summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-x.ci/scripts/windows/docker.sh11
-rw-r--r--.ci/yuzu-mainline-step2.yml4
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt134
-rw-r--r--CMakeModules/GenerateSCMRev.cmake67
-rw-r--r--README.md79
-rw-r--r--dist/qt_themes/default/style.qss20
-rw-r--r--dist/qt_themes/qdarkstyle/style.qss21
-rw-r--r--dist/qt_themes/qdarkstyle_midnight_blue/style.qss21
-rw-r--r--externals/CMakeLists.txt4
m---------externals/Vulkan-Headers0
m---------externals/cpp-httplib0
m---------externals/dynarmic0
-rw-r--r--externals/httplib/README.md15
-rw-r--r--externals/httplib/httplib.h6714
-rw-r--r--externals/libusb/CMakeLists.txt13
m---------externals/sirit0
-rw-r--r--src/CMakeLists.txt18
-rw-r--r--src/audio_core/CMakeLists.txt3
-rw-r--r--src/audio_core/audio_out.cpp3
-rw-r--r--src/audio_core/audio_renderer.cpp92
-rw-r--r--src/audio_core/audio_renderer.h6
-rw-r--r--src/audio_core/command_generator.cpp225
-rw-r--r--src/audio_core/command_generator.h32
-rw-r--r--src/audio_core/common.h2
-rw-r--r--src/audio_core/info_updater.cpp3
-rw-r--r--src/audio_core/sink_context.cpp15
-rw-r--r--src/audio_core/sink_context.h4
-rw-r--r--src/audio_core/stream.cpp7
-rw-r--r--src/audio_core/stream.h6
-rw-r--r--src/audio_core/voice_context.cpp88
-rw-r--r--src/audio_core/voice_context.h13
-rw-r--r--src/common/CMakeLists.txt70
-rw-r--r--src/common/common_sizes.h43
-rw-r--r--src/common/detached_tasks.cpp2
-rw-r--r--src/common/fs/file.cpp33
-rw-r--r--src/common/fs/file.h21
-rw-r--r--src/common/fs/fs.cpp5
-rw-r--r--src/common/fs/fs.h30
-rw-r--r--src/common/fs/fs_util.cpp4
-rw-r--r--src/common/fs/fs_util.h11
-rw-r--r--src/common/hex_util.h3
-rw-r--r--src/common/host_memory.cpp2
-rw-r--r--src/common/literals.h31
-rw-r--r--src/common/logging/backend.cpp30
-rw-r--r--src/common/logging/filter.cpp4
-rw-r--r--src/common/logging/types.h6
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/settings.cpp35
-rw-r--r--src/common/settings.h426
-rw-r--r--src/common/thread_worker.cpp58
-rw-r--r--src/common/thread_worker.h103
-rw-r--r--src/common/unique_function.h62
-rw-r--r--src/common/uuid.h5
-rw-r--r--src/core/CMakeLists.txt88
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp26
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp24
-rw-r--r--src/core/core.cpp38
-rw-r--r--src/core/core.h12
-rw-r--r--src/core/crypto/key_manager.cpp2
-rw-r--r--src/core/file_sys/card_image.cpp18
-rw-r--r--src/core/file_sys/card_image.h3
-rw-r--r--src/core/file_sys/content_archive.cpp1
-rw-r--r--src/core/file_sys/patch_manager.cpp24
-rw-r--r--src/core/file_sys/patch_manager.h3
-rw-r--r--src/core/file_sys/registered_cache.cpp13
-rw-r--r--src/core/file_sys/sdmc_factory.cpp31
-rw-r--r--src/core/file_sys/sdmc_factory.h6
-rw-r--r--src/core/file_sys/submission_package.cpp71
-rw-r--r--src/core/file_sys/submission_package.h11
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp48
-rw-r--r--src/core/file_sys/vfs_real.cpp7
-rw-r--r--src/core/frontend/applets/profile_select.cpp2
-rw-r--r--src/core/frontend/applets/software_keyboard.h2
-rw-r--r--src/core/frontend/applets/web_browser.h2
-rw-r--r--src/core/frontend/input.h1
-rw-r--r--src/core/hardware_interrupt_manager.cpp2
-rw-r--r--src/core/hle/api_version.h40
-rw-r--r--src/core/hle/ipc_helpers.h8
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp37
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp3
-rw-r--r--src/core/hle/kernel/k_address_space_info.cpp35
-rw-r--r--src/core/hle/kernel/k_auto_object.cpp9
-rw-r--r--src/core/hle/kernel/k_auto_object.h12
-rw-r--r--src/core/hle/kernel/k_memory_layout.board.nintendo_nx.cpp5
-rw-r--r--src/core/hle/kernel/k_memory_layout.h27
-rw-r--r--src/core/hle/kernel/k_page_table.cpp5
-rw-r--r--src/core/hle/kernel/k_process.cpp8
-rw-r--r--src/core/hle/kernel/k_resource_limit.cpp1
-rw-r--r--src/core/hle/kernel/k_server_session.cpp5
-rw-r--r--src/core/hle/kernel/k_trace.h6
-rw-r--r--src/core/hle/kernel/kernel.cpp114
-rw-r--r--src/core/hle/kernel/kernel.h17
-rw-r--r--src/core/hle/kernel/svc.cpp6
-rw-r--r--src/core/hle/service/acc/acc.cpp2
-rw-r--r--src/core/hle/service/acc/acc.h2
-rw-r--r--src/core/hle/service/acc/profile_manager.cpp3
-rw-r--r--src/core/hle/service/am/am.cpp12
-rw-r--r--src/core/hle/service/am/applets/applet_controller.cpp272
-rw-r--r--src/core/hle/service/am/applets/applet_controller.h148
-rw-r--r--src/core/hle/service/am/applets/applet_error.cpp194
-rw-r--r--src/core/hle/service/am/applets/applet_error.h (renamed from src/core/hle/service/am/applets/error.h)0
-rw-r--r--src/core/hle/service/am/applets/applet_general_backend.cpp255
-rw-r--r--src/core/hle/service/am/applets/applet_general_backend.h (renamed from src/core/hle/service/am/applets/general_backend.h)0
-rw-r--r--src/core/hle/service/am/applets/applet_profile_select.cpp78
-rw-r--r--src/core/hle/service/am/applets/applet_profile_select.h (renamed from src/core/hle/service/am/applets/profile_select.h)0
-rw-r--r--src/core/hle/service/am/applets/applet_software_keyboard.cpp1082
-rw-r--r--src/core/hle/service/am/applets/applet_software_keyboard.h166
-rw-r--r--src/core/hle/service/am/applets/applet_software_keyboard_types.h (renamed from src/core/hle/service/am/applets/software_keyboard_types.h)0
-rw-r--r--src/core/hle/service/am/applets/applet_web_browser.cpp487
-rw-r--r--src/core/hle/service/am/applets/applet_web_browser.h88
-rw-r--r--src/core/hle/service/am/applets/applet_web_browser_types.h (renamed from src/core/hle/service/am/applets/web_types.h)0
-rw-r--r--src/core/hle/service/am/applets/applets.cpp12
-rw-r--r--src/core/hle/service/am/applets/controller.cpp253
-rw-r--r--src/core/hle/service/am/applets/controller.h137
-rw-r--r--src/core/hle/service/am/applets/error.cpp194
-rw-r--r--src/core/hle/service/am/applets/general_backend.cpp255
-rw-r--r--src/core/hle/service/am/applets/profile_select.cpp78
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp1082
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h166
-rw-r--r--src/core/hle/service/am/applets/web_browser.cpp474
-rw-r--r--src/core/hle/service/am/applets/web_browser.h88
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp10
-rw-r--r--src/core/hle/service/aoc/aoc_u.h1
-rw-r--r--src/core/hle/service/apm/apm.cpp2
-rw-r--r--src/core/hle/service/apm/apm_controller.cpp89
-rw-r--r--src/core/hle/service/apm/apm_controller.h (renamed from src/core/hle/service/apm/controller.h)0
-rw-r--r--src/core/hle/service/apm/apm_interface.cpp138
-rw-r--r--src/core/hle/service/apm/apm_interface.h (renamed from src/core/hle/service/apm/interface.h)0
-rw-r--r--src/core/hle/service/apm/controller.cpp89
-rw-r--r--src/core/hle/service/apm/interface.cpp138
-rw-r--r--src/core/hle/service/audio/audout_u.cpp10
-rw-r--r--src/core/hle/service/audio/audren_u.cpp14
-rw-r--r--src/core/hle/service/audio/hwopus.cpp45
-rw-r--r--src/core/hle/service/audio/hwopus.h4
-rw-r--r--src/core/hle/service/bcat/backend/boxcat.cpp7
-rw-r--r--src/core/hle/service/bcat/bcat.h2
-rw-r--r--src/core/hle/service/bcat/bcat_module.cpp610
-rw-r--r--src/core/hle/service/bcat/bcat_module.h (renamed from src/core/hle/service/bcat/module.h)0
-rw-r--r--src/core/hle/service/bcat/module.cpp610
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp24
-rw-r--r--src/core/hle/service/filesystem/filesystem.h1
-rw-r--r--src/core/hle/service/friend/friend.cpp2
-rw-r--r--src/core/hle/service/friend/friend_interface.cpp21
-rw-r--r--src/core/hle/service/friend/friend_interface.h (renamed from src/core/hle/service/friend/interface.h)0
-rw-r--r--src/core/hle/service/friend/interface.cpp21
-rw-r--r--src/core/hle/service/glue/arp.cpp2
-rw-r--r--src/core/hle/service/glue/glue_manager.cpp78
-rw-r--r--src/core/hle/service/glue/glue_manager.h (renamed from src/core/hle/service/glue/manager.h)0
-rw-r--r--src/core/hle/service/glue/manager.cpp78
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp22
-rw-r--r--src/core/hle/service/hid/controllers/npad.h12
-rw-r--r--src/core/hle/service/hid/hid.cpp14
-rw-r--r--src/core/hle/service/hid/hid.h13
-rw-r--r--src/core/hle/service/kernel_helpers.cpp62
-rw-r--r--src/core/hle/service/kernel_helpers.h35
-rw-r--r--src/core/hle/service/mii/manager.cpp464
-rw-r--r--src/core/hle/service/mii/manager.h331
-rw-r--r--src/core/hle/service/mii/mii.cpp2
-rw-r--r--src/core/hle/service/mii/mii_manager.cpp465
-rw-r--r--src/core/hle/service/mii/mii_manager.h333
-rw-r--r--src/core/hle/service/mii/raw_data.h2
-rw-r--r--src/core/hle/service/nifm/nifm.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp85
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp259
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp13
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h3
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp259
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h (renamed from src/core/hle/service/nvdrv/interface.h)0
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp8
-rw-r--r--src/core/hle/service/pctl/module.cpp406
-rw-r--r--src/core/hle/service/pctl/pctl.h2
-rw-r--r--src/core/hle/service/pctl/pctl_module.cpp406
-rw-r--r--src/core/hle/service/pctl/pctl_module.h (renamed from src/core/hle/service/pctl/module.h)0
-rw-r--r--src/core/hle/service/service.cpp23
-rw-r--r--src/core/hle/service/service.h7
-rw-r--r--src/core/hle/service/set/set.cpp2
-rw-r--r--src/core/hle/service/sm/controller.cpp80
-rw-r--r--src/core/hle/service/sm/sm.cpp67
-rw-r--r--src/core/hle/service/sm/sm.h14
-rw-r--r--src/core/hle/service/sm/sm_controller.cpp80
-rw-r--r--src/core/hle/service/sm/sm_controller.h (renamed from src/core/hle/service/sm/controller.h)0
-rw-r--r--src/core/hle/service/spl/csrng.cpp2
-rw-r--r--src/core/hle/service/spl/csrng.h2
-rw-r--r--src/core/hle/service/spl/module.cpp53
-rw-r--r--src/core/hle/service/spl/module.h37
-rw-r--r--src/core/hle/service/spl/spl.cpp84
-rw-r--r--src/core/hle/service/spl/spl.h2
-rw-r--r--src/core/hle/service/spl/spl_module.cpp175
-rw-r--r--src/core/hle/service/spl/spl_module.h48
-rw-r--r--src/core/hle/service/spl/spl_results.h31
-rw-r--r--src/core/hle/service/spl/spl_types.h232
-rw-r--r--src/core/hle/service/time/interface.cpp42
-rw-r--r--src/core/hle/service/time/time.cpp2
-rw-r--r--src/core/hle/service/time/time_interface.cpp42
-rw-r--r--src/core/hle/service/time/time_interface.h (renamed from src/core/hle/service/time/interface.h)0
-rw-r--r--src/core/hle/service/time/time_zone_content_manager.cpp2
-rw-r--r--src/core/loader/loader.cpp13
-rw-r--r--src/core/loader/loader.h13
-rw-r--r--src/core/loader/nro.cpp4
-rw-r--r--src/core/loader/nso.cpp4
-rw-r--r--src/core/loader/nsp.cpp34
-rw-r--r--src/core/loader/nsp.h4
-rw-r--r--src/core/loader/xci.cpp14
-rw-r--r--src/core/loader/xci.h3
-rw-r--r--src/core/perf_stats.cpp20
-rw-r--r--src/core/perf_stats.h6
-rw-r--r--src/core/reporter.cpp3
-rw-r--r--src/core/telemetry_session.cpp18
-rw-r--r--src/input_common/CMakeLists.txt19
-rwxr-xr-xsrc/input_common/analog_from_button.cpp1
-rw-r--r--src/input_common/gcadapter/gc_adapter.cpp7
-rw-r--r--src/input_common/main.cpp4
-rw-r--r--src/input_common/mouse/mouse_input.cpp16
-rw-r--r--src/input_common/mouse/mouse_input.h6
-rw-r--r--src/input_common/mouse/mouse_poller.cpp5
-rw-r--r--src/input_common/sdl/sdl_impl.cpp65
-rw-r--r--src/input_common/sdl/sdl_impl.h8
-rw-r--r--src/input_common/touch_from_button.cpp3
-rw-r--r--src/input_common/udp/client.cpp4
-rw-r--r--src/input_common/udp/protocol.h7
-rw-r--r--src/shader_recompiler/CMakeLists.txt268
-rw-r--r--src/shader_recompiler/backend/bindings.h19
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.cpp154
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.h80
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp492
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.h25
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp91
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp244
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp346
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp231
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp414
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp850
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h625
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp294
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp568
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp273
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_select.cpp67
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_special.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp0
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp150
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp186
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h303
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp715
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.h174
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.cpp252
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.h24
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp418
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp94
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp219
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp21
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp230
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp456
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp799
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h702
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp253
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp28
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp202
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp105
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_select.cpp55
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp79
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_special.cpp111
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp32
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp217
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.cpp308
-rw-r--r--src/shader_recompiler/backend/glsl/var_alloc.h105
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp1368
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h307
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp541
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h27
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp448
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp38
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp66
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp155
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp505
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp28
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp269
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp396
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp462
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp183
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h581
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp270
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp26
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp275
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_select.cpp42
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp174
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_special.cpp150
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp30
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp203
-rw-r--r--src/shader_recompiler/environment.h53
-rw-r--r--src/shader_recompiler/exception.h66
-rw-r--r--src/shader_recompiler/frontend/ir/abstract_syntax_list.h58
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp454
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h250
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.cpp149
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.h185
-rw-r--r--src/shader_recompiler/frontend/ir/breadth_first_search.h56
-rw-r--r--src/shader_recompiler/frontend/ir/condition.cpp29
-rw-r--r--src/shader_recompiler/frontend/ir/condition.h60
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.cpp83
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.h62
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp2017
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h413
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp411
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h49
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.cpp15
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.h110
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc550
-rw-r--r--src/shader_recompiler/frontend/ir/patch.cpp28
-rw-r--r--src/shader_recompiler/frontend/ir/patch.h149
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.h14
-rw-r--r--src/shader_recompiler/frontend/ir/pred.h44
-rw-r--r--src/shader_recompiler/frontend/ir/program.cpp32
-rw-r--r--src/shader_recompiler/frontend/ir/program.h35
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h332
-rw-r--r--src/shader_recompiler/frontend/ir/type.cpp38
-rw-r--r--src/shader_recompiler/frontend/ir/type.h61
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp99
-rw-r--r--src/shader_recompiler/frontend/ir/value.h398
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
-rw-r--r--src/shader_recompiler/host_translate_info.h18
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp928
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp610
-rw-r--r--src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp26
-rw-r--r--src/shader_recompiler/ir_opt/dual_vertex_pass.cpp30
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp526
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp38
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp143
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp218
-rw-r--r--src/shader_recompiler/ir_opt/passes.h32
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp383
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp523
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp98
-rw-r--r--src/shader_recompiler/object_pool.h105
-rw-r--r--src/shader_recompiler/profile.h74
-rw-r--r--src/shader_recompiler/program_header.h219
-rw-r--r--src/shader_recompiler/runtime_info.h88
-rw-r--r--src/shader_recompiler/shader_info.h193
-rw-r--r--src/shader_recompiler/stage.h28
-rw-r--r--src/shader_recompiler/varying_state.h69
-rw-r--r--src/tests/CMakeLists.txt1
-rw-r--r--src/tests/common/host_memory.cpp4
-rw-r--r--src/tests/common/unique_function.cpp110
-rw-r--r--src/tests/video_core/buffer_base.cpp2
-rw-r--r--src/video_core/CMakeLists.txt84
-rw-r--r--src/video_core/buffer_cache/buffer_base.h30
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h763
-rw-r--r--src/video_core/cdma_pusher.cpp3
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp85
-rw-r--r--src/video_core/command_classes/codecs/codec.h12
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp207
-rw-r--r--src/video_core/command_classes/codecs/h264.h132
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp4
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h307
-rw-r--r--src/video_core/command_classes/nvdec.cpp17
-rw-r--r--src/video_core/command_classes/nvdec.h8
-rw-r--r--src/video_core/command_classes/nvdec_common.h103
-rw-r--r--src/video_core/command_classes/vic.cpp39
-rw-r--r--src/video_core/dirty_flags.cpp6
-rw-r--r--src/video_core/dirty_flags.h2
-rw-r--r--src/video_core/dma_pusher.cpp10
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h103
-rw-r--r--src/video_core/engines/fermi_2d.cpp22
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp45
-rw-r--r--src/video_core/engines/kepler_compute.h21
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h47
-rw-r--r--src/video_core/engines/maxwell_dma.cpp37
-rw-r--r--src/video_core/engines/maxwell_dma.h19
-rw-r--r--src/video_core/engines/shader_bytecode.h2298
-rw-r--r--src/video_core/engines/shader_header.h158
-rw-r--r--src/video_core/engines/shader_type.h21
-rw-r--r--src/video_core/fence_manager.h7
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/guest_driver.cpp37
-rw-r--r--src/video_core/guest_driver.h46
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt5
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp46
-rw-r--r--src/video_core/memory_manager.cpp108
-rw-r--r--src/video_core/memory_manager.h22
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp3
-rw-r--r--src/video_core/renderer_base.h5
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h59
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp209
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h93
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp200
-rw-r--r--src/video_core/renderer_opengl/gl_device.h83
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp572
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h169
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp483
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h63
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp989
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h172
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h185
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h89
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h5
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp392
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h62
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp72
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h10
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp58
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp40
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp92
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h79
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp54
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h7
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h154
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp62
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp99
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp134
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp407
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp296
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp839
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h145
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp867
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h176
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp506
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h76
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp96
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h55
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3166
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h99
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp165
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h76
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp59
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp311
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h91
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h4
-rw-r--r--src/video_core/shader/ast.cpp752
-rw-r--r--src/video_core/shader/ast.h398
-rw-r--r--src/video_core/shader/async_shaders.cpp234
-rw-r--r--src/video_core/shader/async_shaders.h138
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/control_flow.cpp751
-rw-r--r--src/video_core/shader/control_flow.h117
-rw-r--r--src/video_core/shader/decode.cpp368
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp166
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp101
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp54
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp53
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp375
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp99
-rw-r--r--src/video_core/shader/decode/bfe.cpp77
-rw-r--r--src/video_core/shader/decode/bfi.cpp45
-rw-r--r--src/video_core/shader/decode/conversion.cpp321
-rw-r--r--src/video_core/shader/decode/ffma.cpp62
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp57
-rw-r--r--src/video_core/shader/decode/half_set.cpp115
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp80
-rw-r--r--src/video_core/shader/decode/hfma2.cpp73
-rw-r--r--src/video_core/shader/decode/image.cpp536
-rw-r--r--src/video_core/shader/decode/integer_set.cpp49
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/other.cpp322
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp68
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp86
-rw-r--r--src/video_core/shader/decode/shift.cpp153
-rw-r--r--src/video_core/shader/decode/texture.cpp935
-rw-r--r--src/video_core/shader/decode/video.cpp169
-rw-r--r--src/video_core/shader/decode/warp.cpp117
-rw-r--r--src/video_core/shader/decode/xmad.cpp156
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/memory_util.cpp76
-rw-r--r--src/video_core/shader/memory_util.h43
-rw-r--r--src/video_core/shader/node.h701
-rw-r--r--src/video_core/shader/node_helper.cpp115
-rw-r--r--src/video_core/shader/node_helper.h71
-rw-r--r--src/video_core/shader/registry.cpp181
-rw-r--r--src/video_core/shader/registry.h172
-rw-r--r--src/video_core/shader/shader_ir.cpp464
-rw-r--r--src/video_core/shader/shader_ir.h479
-rw-r--r--src/video_core/shader/track.cpp236
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/shader_cache.cpp250
-rw-r--r--src/video_core/shader_cache.h215
-rw-r--r--src/video_core/shader_environment.cpp460
-rw-r--r--src/video_core/shader_environment.h183
-rw-r--r--src/video_core/shader_notify.cpp51
-rw-r--r--src/video_core/shader_notify.h28
-rw-r--r--src/video_core/surface.cpp7
-rw-r--r--src/video_core/surface.h2
-rw-r--r--src/video_core/texture_cache/formatter.cpp4
-rw-r--r--src/video_core/texture_cache/formatter.h3
-rw-r--r--src/video_core/texture_cache/image_base.cpp42
-rw-r--r--src/video_core/texture_cache/image_base.h43
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp9
-rw-r--r--src/video_core/texture_cache/image_view_base.h1
-rw-r--r--src/video_core/texture_cache/render_targets.h6
-rw-r--r--src/video_core/texture_cache/slot_vector.h70
-rw-r--r--src/video_core/texture_cache/texture_cache.h681
-rw-r--r--src/video_core/texture_cache/types.h5
-rw-r--r--src/video_core/texture_cache/util.cpp114
-rw-r--r--src/video_core/texture_cache/util.h4
-rw-r--r--src/video_core/textures/astc.cpp14
-rw-r--r--src/video_core/textures/astc.h11
-rw-r--r--src/video_core/textures/texture.h9
-rw-r--r--src/video_core/transform_feedback.cpp99
-rw-r--r--src/video_core/transform_feedback.h30
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp9
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h21
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp391
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h182
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp22
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp7
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h44
-rw-r--r--src/web_service/web_backend.cpp13
-rw-r--r--src/yuzu/CMakeLists.txt28
-rw-r--r--src/yuzu/about_dialog.cpp3
-rw-r--r--src/yuzu/applets/controller.cpp695
-rw-r--r--src/yuzu/applets/error.cpp63
-rw-r--r--src/yuzu/applets/profile_select.cpp163
-rw-r--r--src/yuzu/applets/qt_controller.cpp695
-rw-r--r--src/yuzu/applets/qt_controller.h (renamed from src/yuzu/applets/controller.h)0
-rw-r--r--src/yuzu/applets/qt_controller.ui (renamed from src/yuzu/applets/controller.ui)0
-rw-r--r--src/yuzu/applets/qt_error.cpp63
-rw-r--r--src/yuzu/applets/qt_error.h (renamed from src/yuzu/applets/error.h)0
-rw-r--r--src/yuzu/applets/qt_profile_select.cpp163
-rw-r--r--src/yuzu/applets/qt_profile_select.h (renamed from src/yuzu/applets/profile_select.h)0
-rw-r--r--src/yuzu/applets/qt_software_keyboard.cpp1620
-rw-r--r--src/yuzu/applets/qt_software_keyboard.h (renamed from src/yuzu/applets/software_keyboard.h)0
-rw-r--r--src/yuzu/applets/qt_software_keyboard.ui (renamed from src/yuzu/applets/software_keyboard.ui)0
-rw-r--r--src/yuzu/applets/qt_web_browser.cpp417
-rw-r--r--src/yuzu/applets/qt_web_browser.h (renamed from src/yuzu/applets/web_browser.h)0
-rw-r--r--src/yuzu/applets/qt_web_browser_scripts.h (renamed from src/yuzu/applets/web_browser_scripts.h)0
-rw-r--r--src/yuzu/applets/software_keyboard.cpp1620
-rw-r--r--src/yuzu/applets/web_browser.cpp417
-rw-r--r--src/yuzu/bootmanager.cpp38
-rw-r--r--src/yuzu/bootmanager.h6
-rw-r--r--src/yuzu/configuration/config.cpp709
-rw-r--r--src/yuzu/configuration/config.h80
-rw-r--r--src/yuzu/configuration/configure.ui50
-rw-r--r--src/yuzu/configuration/configure_audio.cpp21
-rw-r--r--src/yuzu/configuration/configure_cpu.cpp27
-rw-r--r--src/yuzu/configuration/configure_cpu.h2
-rw-r--r--src/yuzu/configuration/configure_cpu.ui26
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.cpp21
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.h1
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.ui94
-rw-r--r--src/yuzu/configuration/configure_debug.cpp36
-rw-r--r--src/yuzu/configuration/configure_debug.ui89
-rw-r--r--src/yuzu/configuration/configure_debug_tab.cpp38
-rw-r--r--src/yuzu/configuration/configure_debug_tab.h32
-rw-r--r--src/yuzu/configuration/configure_debug_tab.ui52
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp6
-rw-r--r--src/yuzu/configuration/configure_filesystem.cpp13
-rw-r--r--src/yuzu/configuration/configure_general.cpp58
-rw-r--r--src/yuzu/configuration/configure_general.h2
-rw-r--r--src/yuzu/configuration/configure_general.ui34
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp94
-rw-r--r--src/yuzu/configuration/configure_graphics.h4
-rw-r--r--src/yuzu/configuration/configure_graphics.ui118
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp13
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h2
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui28
-rw-r--r--src/yuzu/configuration/configure_input_advanced.cpp12
-rw-r--r--src/yuzu/configuration/configure_input_advanced.ui15
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp47
-rw-r--r--src/yuzu/configuration/configure_input_player.ui44
-rw-r--r--src/yuzu/configuration/configure_motion_touch.cpp11
-rw-r--r--src/yuzu/configuration/configure_per_game.cpp2
-rw-r--r--src/yuzu/configuration/configure_per_game.ui7
-rw-r--r--src/yuzu/configuration/configure_per_game_addons.cpp4
-rw-r--r--src/yuzu/configuration/configure_profile_manager.cpp11
-rw-r--r--src/yuzu/configuration/configure_service.cpp2
-rw-r--r--src/yuzu/configuration/configure_ui.cpp11
-rw-r--r--src/yuzu/configuration/configure_web.cpp12
-rw-r--r--src/yuzu/debugger/console.cpp4
-rw-r--r--src/yuzu/debugger/profiler.cpp9
-rw-r--r--src/yuzu/game_list.cpp37
-rw-r--r--src/yuzu/game_list.h16
-rw-r--r--src/yuzu/game_list_p.h15
-rw-r--r--src/yuzu/game_list_worker.cpp44
-rw-r--r--src/yuzu/main.cpp341
-rw-r--r--src/yuzu/main.h20
-rw-r--r--src/yuzu/uisettings.h43
-rw-r--r--src/yuzu_cmd/CMakeLists.txt5
-rw-r--r--src/yuzu_cmd/config.cpp218
-rw-r--r--src/yuzu_cmd/config.h12
-rw-r--r--src/yuzu_cmd/default_ini.h178
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp52
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp8
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp34
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h2
-rw-r--r--src/yuzu_cmd/yuzu.cpp12
742 files changed, 64426 insertions, 45609 deletions
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh
index feba3fd6e..155d8a5c8 100755
--- a/.ci/scripts/windows/docker.sh
+++ b/.ci/scripts/windows/docker.sh
@@ -18,19 +18,20 @@ cd ..
mkdir package
if [ -d "/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/" ]; then
- QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
+ QT_PLUGINS_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins'
else
#fallback to qt
- QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt/plugins/platforms/'
+ QT_PLUGINS_PATH='/usr/x86_64-w64-mingw32/lib/qt/plugins'
fi
find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
# copy Qt plugins
mkdir package/platforms
-cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/
-cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/
-cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/
+cp -v "${QT_PLUGINS_PATH}/platforms/qwindows.dll" package/platforms/
+cp -rv "${QT_PLUGINS_PATH}/mediaservice/" package/
+cp -rv "${QT_PLUGINS_PATH}/imageformats/" package/
+cp -rv "${QT_PLUGINS_PATH}/styles/" package/
rm -f package/mediaservice/*d.dll
for i in package/*.exe; do
diff --git a/.ci/yuzu-mainline-step2.yml b/.ci/yuzu-mainline-step2.yml
index a90041d28..3159ce3ed 100644
--- a/.ci/yuzu-mainline-step2.yml
+++ b/.ci/yuzu-mainline-step2.yml
@@ -19,6 +19,7 @@ stages:
displayName: 'build'
jobs:
- job: build
+ timeoutInMinutes: 120
displayName: 'standard'
pool:
vmImage: ubuntu-latest
@@ -43,6 +44,7 @@ stages:
displayName: 'build-windows'
jobs:
- job: build
+ timeoutInMinutes: 120
displayName: 'msvc'
pool:
vmImage: windows-2019
@@ -65,4 +67,4 @@ stages:
- job: github
displayName: 'github'
steps:
- - template: ./templates/release-github.yml \ No newline at end of file
+ - template: ./templates/release-github.yml
diff --git a/.gitmodules b/.gitmodules
index d294e850d..749cd0408 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -43,3 +43,6 @@
[submodule "SDL"]
path = externals/SDL
url = https://github.com/libsdl-org/SDL.git
+[submodule "externals/cpp-httplib"]
+ path = externals/cpp-httplib
+ url = https://github.com/yhirose/cpp-httplib.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 01de9ac0d..d98ba7767 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,7 @@ project(yuzu)
option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_SDL2 "Download bundled SDL2 binaries" ON "ENABLE_SDL2;MSVC" OFF)
# On Linux system SDL2 is likely to be lacking HIDAPI support which have drawbacks but is needed for SDL motion
-option(YUZU_ALLOW_SYSTEM_SDL2 "Try using system SDL2 before fallling back to one from externals" OFF)
+CMAKE_DEPENDENT_OPTION(YUZU_USE_EXTERNAL_SDL2 "Compile external SDL2" ON "ENABLE_SDL2;NOT MSVC" OFF)
option(ENABLE_QT "Enable the Qt frontend" ON)
option(ENABLE_QT_TRANSLATION "Enable translations for the Qt frontend" OFF)
@@ -47,9 +47,10 @@ if (NOT IS_MULTI_CONFIG AND NOT CMAKE_BUILD_TYPE)
endif()
if(EXISTS ${PROJECT_SOURCE_DIR}/hooks/pre-commit AND NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
- message(STATUS "Copying pre-commit hook")
- file(COPY hooks/pre-commit
- DESTINATION ${PROJECT_SOURCE_DIR}/.git/hooks)
+ if (EXISTS ${PROJECT_SOURCE_DIR}/.git/)
+ message(STATUS "Copying pre-commit hook")
+ file(COPY hooks/pre-commit DESTINATION ${PROJECT_SOURCE_DIR}/.git/hooks)
+ endif()
endif()
# Sanity check : Check that all submodules are present
@@ -172,7 +173,7 @@ macro(yuzu_find_packages)
set(REQUIRED_LIBS
# Cmake Pkg Prefix Version Conan Pkg
"Catch2 2.13 catch2/2.13.0"
- "fmt 7.1 fmt/7.1.2"
+ "fmt 8.0 fmt/8.0.0"
"lz4 1.8 lz4/1.9.2"
"nlohmann_json 3.8 nlohmann_json/3.8.0"
"ZLIB 1.2 zlib/1.2.11"
@@ -254,10 +255,83 @@ if(ENABLE_QT)
# Check for system Qt on Linux, fallback to bundled Qt
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
if (NOT YUZU_USE_BUNDLED_QT)
- find_package(Qt5 ${QT_VERSION} COMPONENTS Widgets QUIET)
- if (NOT Qt5_FOUND)
- set(YUZU_USE_BUNDLED_QT ON CACHE BOOL "Download bundled Qt" FORCE)
+ find_package(Qt5 ${QT_VERSION} COMPONENTS Widgets)
+ endif()
+ if (NOT Qt5_FOUND OR YUZU_USE_BUNDLED_QT)
+ # Check for dependencies, then enable bundled Qt download
+
+ # Check that the system GLIBCXX version is compatible
+ find_program(OBJDUMP objdump)
+ if ("${OBJDUMP}" STREQUAL "OBJDUMP-NOTFOUND")
+ message(FATAL_ERROR "Required program `objdump` not found.")
+ endif()
+ find_library(LIBSTDCXX libstdc++.so.6)
+ execute_process(
+ COMMAND
+ ${OBJDUMP} -T ${LIBSTDCXX}
+ COMMAND
+ grep GLIBCXX_3.4.28
+ COMMAND
+ sed "s/[0-9a-f]*.* //"
+ COMMAND
+ sed "s/ .*//"
+ COMMAND
+ sort -u
+ OUTPUT_VARIABLE
+ GLIBCXX_MET
+ )
+ if (NOT GLIBCXX_MET)
+ message(FATAL_ERROR "Qt too old or not found, and bundled Qt package is not \
+ compatible with this system. Either install Qt ${QT_VERSION}, or provide the path \
+ to Qt by setting the variable Qt5_ROOT.")
+ endif()
+
+ # Check for headers
+ Include(FindPkgConfig REQUIRED)
+ pkg_check_modules(QT_DEP_GLU QUIET glu>=9.0.0)
+ if (NOT QT_DEP_GLU_FOUND)
+ message(FATAL_ERROR "Qt bundled pacakge dependency `glu` not found. \
+ Perhaps `libglu1-mesa-dev` needs to be installed?")
endif()
+ pkg_check_modules(QT_DEP_MESA QUIET dri>=20.0.8)
+ if (NOT QT_DEP_MESA_FOUND)
+ message(FATAL_ERROR "Qt bundled pacakge dependency `dri` not found. \
+ Perhaps `mesa-common-dev` needs to be installed?")
+ endif()
+
+ # Check for X libraries
+ set(BUNDLED_QT_REQUIREMENTS
+ libxcb-icccm.so.4
+ libxcb-image.so.0
+ libxcb-keysyms.so.1
+ libxcb-randr.so.0
+ libxcb-render-util.so.0
+ libxcb-render.so.0
+ libxcb-shape.so.0
+ libxcb-shm.so.0
+ libxcb-sync.so.1
+ libxcb-xfixes.so.0
+ libxcb-xinerama.so.0
+ libxcb-xkb.so.1
+ libxcb.so.1
+ libxkbcommon-x11.so.0
+ libxkbcommon.so.0
+ )
+ set(UNRESOLVED_QT_DEPS "")
+ foreach (REQUIREMENT ${BUNDLED_QT_REQUIREMENTS})
+ find_library(BUNDLED_QT_${REQUIREMENT} ${REQUIREMENT})
+ if ("${BUNDLED_QT_${REQUIREMENT}}" STREQUAL "BUNDLED_QT_${REQUIREMENT}-NOTFOUND")
+ set(UNRESOLVED_QT_DEPS ${UNRESOLVED_QT_DEPS} ${REQUIREMENT})
+ endif()
+ unset(BUNDLED_QT_${REQUIREMENT})
+ endforeach()
+ unset(BUNDLED_QT_REQUIREMENTS)
+
+ if (NOT "${UNRESOLVED_QT_DEPS}" STREQUAL "")
+ message(FATAL_ERROR "Bundled Qt package missing required dependencies: ${UNRESOLVED_QT_DEPS}")
+ endif()
+
+ set(YUZU_USE_BUNDLED_QT ON CACHE BOOL "Download bundled Qt" FORCE)
endif()
if (YUZU_USE_BUNDLED_QT)
# Binary package currently does not support Qt webengine, so make sure it's disabled
@@ -265,6 +339,8 @@ if(ENABLE_QT)
endif()
endif()
+ set(YUZU_QT_NO_CMAKE_SYSTEM_PATH)
+
# Workaround for an issue where conan tries to build Qt from scratch instead of download prebuilt binaries
set(QT_PREFIX_HINT)
@@ -282,8 +358,10 @@ if(ENABLE_QT)
endif()
set(QT_PREFIX_HINT HINTS "${QT_PREFIX}")
+
+ set(YUZU_QT_NO_CMAKE_SYSTEM_PATH "NO_CMAKE_SYSTEM_PATH")
endif()
- find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} NO_CMAKE_SYSTEM_PATH)
+ find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH})
if (YUZU_USE_QT_WEB_ENGINE)
find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets)
endif()
@@ -315,26 +393,20 @@ if (ENABLE_SDL2)
add_library(SDL2 INTERFACE)
target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}")
target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}")
+ elseif (YUZU_USE_EXTERNAL_SDL2)
+ message(STATUS "Using SDL2 from externals.")
else()
- if (YUZU_ALLOW_SYSTEM_SDL2)
- find_package(SDL2 2.0.15 QUIET)
-
- if (SDL2_FOUND)
- # Some installations don't set SDL2_LIBRARIES
- if("${SDL2_LIBRARIES}" STREQUAL "")
- message(WARNING "SDL2_LIBRARIES wasn't set, manually setting to SDL2::SDL2")
- set(SDL2_LIBRARIES "SDL2::SDL2")
- endif()
+ find_package(SDL2 2.0.15 REQUIRED)
- include_directories(SYSTEM ${SDL2_INCLUDE_DIRS})
- add_library(SDL2 INTERFACE)
- target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
- else()
- message(STATUS "SDL2 2.0.15 or newer not found, falling back to externals.")
- endif()
- else()
- message(STATUS "Using SDL2 from externals.")
+ # Some installations don't set SDL2_LIBRARIES
+ if("${SDL2_LIBRARIES}" STREQUAL "")
+ message(WARNING "SDL2_LIBRARIES wasn't set, manually setting to SDL2::SDL2")
+ set(SDL2_LIBRARIES "SDL2::SDL2")
endif()
+
+ include_directories(SYSTEM ${SDL2_INCLUDE_DIRS})
+ add_library(SDL2 INTERFACE)
+ target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
endif()
endif()
@@ -473,7 +545,15 @@ if (YUZU_USE_BUNDLED_FFMPEG)
# FFmpeg has source that requires one of nasm or yasm to assemble it.
# REQUIRED throws an error if not found here during configuration rather than during compilation.
- find_program(ASSEMBLER NAMES nasm yasm REQUIRED)
+ find_program(ASSEMBLER NAMES nasm yasm)
+ if ("${ASSEMBLER}" STREQUAL "ASSEMBLER-NOTFOUND")
+ message(FATAL_ERROR "One of either `nasm` or `yasm` not found but is required.")
+ endif()
+
+ find_program(AUTOCONF autoconf)
+ if ("${AUTOCONF}" STREQUAL "AUTOCONF-NOTFOUND")
+ message(FATAL_ERROR "Required program `autoconf` not found.")
+ endif()
set(FFmpeg_PREFIX ${PROJECT_SOURCE_DIR}/externals/ffmpeg)
set(FFmpeg_BUILD_DIR ${PROJECT_BINARY_DIR}/externals/ffmpeg)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 311ba1c2e..43ca730ec 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -48,69 +48,6 @@ if (BUILD_REPOSITORY)
endif()
endif()
-# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
-set(VIDEO_CORE "${SRC_DIR}/src/video_core")
-set(HASH_FILES
- "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
- "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
- "${VIDEO_CORE}/shader/decode/bfe.cpp"
- "${VIDEO_CORE}/shader/decode/bfi.cpp"
- "${VIDEO_CORE}/shader/decode/conversion.cpp"
- "${VIDEO_CORE}/shader/decode/ffma.cpp"
- "${VIDEO_CORE}/shader/decode/float_set.cpp"
- "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/half_set.cpp"
- "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/hfma2.cpp"
- "${VIDEO_CORE}/shader/decode/image.cpp"
- "${VIDEO_CORE}/shader/decode/integer_set.cpp"
- "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/memory.cpp"
- "${VIDEO_CORE}/shader/decode/texture.cpp"
- "${VIDEO_CORE}/shader/decode/other.cpp"
- "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
- "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/shift.cpp"
- "${VIDEO_CORE}/shader/decode/video.cpp"
- "${VIDEO_CORE}/shader/decode/warp.cpp"
- "${VIDEO_CORE}/shader/decode/xmad.cpp"
- "${VIDEO_CORE}/shader/ast.cpp"
- "${VIDEO_CORE}/shader/ast.h"
- "${VIDEO_CORE}/shader/compiler_settings.cpp"
- "${VIDEO_CORE}/shader/compiler_settings.h"
- "${VIDEO_CORE}/shader/control_flow.cpp"
- "${VIDEO_CORE}/shader/control_flow.h"
- "${VIDEO_CORE}/shader/decode.cpp"
- "${VIDEO_CORE}/shader/expr.cpp"
- "${VIDEO_CORE}/shader/expr.h"
- "${VIDEO_CORE}/shader/node.h"
- "${VIDEO_CORE}/shader/node_helper.cpp"
- "${VIDEO_CORE}/shader/node_helper.h"
- "${VIDEO_CORE}/shader/registry.cpp"
- "${VIDEO_CORE}/shader/registry.h"
- "${VIDEO_CORE}/shader/shader_ir.cpp"
- "${VIDEO_CORE}/shader/shader_ir.h"
- "${VIDEO_CORE}/shader/track.cpp"
- "${VIDEO_CORE}/shader/transform_feedback.cpp"
- "${VIDEO_CORE}/shader/transform_feedback.h"
-)
-set(COMBINED "")
-foreach (F IN LISTS HASH_FILES)
- file(READ ${F} TMP)
- set(COMBINED "${COMBINED}${TMP}")
-endforeach()
-string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
+# The variable SRC_DIR must be passed into the script
+# (since it uses the current build directory for all values of CMAKE_*_DIR)
configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
diff --git a/README.md b/README.md
index cb1a64d8c..2cb030aed 100644
--- a/README.md
+++ b/README.md
@@ -1,43 +1,82 @@
-yuzu emulator
-=============
-[![Travis CI Build Status](https://travis-ci.com/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.com/yuzu-emu/yuzu)
-[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
-[![Discord](https://img.shields.io/discord/398318088170242053?color=%237289DA&label=yuzu&logo=discord&logoColor=white)](https://discord.com/invite/u77vRWY)
+<h1 align="center">
+ <br>
+ <a href="https://yuzu-emu.org/"><img src="https://raw.githubusercontent.com/yuzu-emu/yuzu-assets/master/icons/icon.png" alt="yuzu" width="200"></a>
+ <br>
+ <b>yuzu</b>
+ <br>
+</h1>
-yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
+<h4 align="center"><b>yuzu</b> is the world's most popular, open-source, Nintendo Switch emulator — started by the creators of <a href="https://citra-emu.org" target="_blank">Citra</a>.
+<br>
+It is written in C++ with portability in mind, and we actively maintain builds for Windows and Linux.
+</h4>
-It is written in C++ with portability in mind, with builds actively maintained for Windows and Linux. The emulator is capable of running several commercial games.
+<p align="center">
+ <a href="https://dev.azure.com/yuzu-emu/yuzu/">
+ <img src="https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master"
+ alt="Azure Mainline CI Build Status">
+ </a>
+ <a href="https://discord.com/invite/u77vRWY">
+ <img src="https://img.shields.io/discord/398318088170242053?color=%237289DA&label=yuzu&logo=discord&logoColor=white"
+ alt="Discord">
+ </a>
+</p>
-yuzu only emulates a subset of Switch hardware and therefore most commercial games **do not** run at full speed or are not fully functional.
+<p align="center">
+ <a href="#compatibility">Compatibility</a> |
+ <a href="#development">Development</a> |
+ <a href="#building">Building</a> |
+ <a href="#download">Download</a> |
+ <a href="#support">Support</a> |
+ <a href="#license">License</a>
+</p>
-Do you want to check which games are compatible and which ones are not? Please visit our [Compatibility page](https://yuzu-emu.org/game/)!
+## Compatibility
-yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.
+The emulator is capable of running most commercial games at full speed, provided you meet the [necessary hardware requirements](https://yuzu-emu.org/help/quickstart/#hardware-requirements).
-Check out our [website](https://yuzu-emu.org/)!
+For a full list of games yuzu support, please visit our [Compatibility page](https://yuzu-emu.org/game/)
-For development discussion, please join us on [Discord](https://discord.com/invite/u77vRWY).
+Check out our [website](https://yuzu-emu.org/) for the latest news on exciting features, monthly progress reports, and more!
-### Development
+## Development
-Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted.
+Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. For development discussion, please join us on [Discord](https://discord.com/invite/u77vRWY).
-If you want to contribute please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). You should also contact any of the developers on Discord in order to know about the current state of the emulator.
+If you want to contribute, please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information).
+You can also contact any of the developers on Discord in order to know about the current state of the emulator.
-If you want to contribute to the user interface translation, please check out the [yuzu project on transifex](https://www.transifex.com/yuzu-emulator/yuzu). We centralize translation work there, and periodically upstream translations.
+If you want to contribute to the user interface translation project, please check out the [yuzu project on transifex](https://www.transifex.com/yuzu-emulator/yuzu). We centralize translation work there, and periodically upstream translations.
-### Building
+## Building
* __Windows__: [Windows Build](https://github.com/yuzu-emu/yuzu/wiki/Building-For-Windows)
* __Linux__: [Linux Build](https://github.com/yuzu-emu/yuzu/wiki/Building-For-Linux)
+## Download
-### Support
-We happily accept monetary donations, or donated games and hardware. Please see our [donations page](https://yuzu-emu.org/donate/) for more information on how you can contribute to yuzu. Any donations received will go towards things like:
+You can download the latest releases automatically via the installer on our [downloads](https://yuzu-emu.org/downloads/) page.
+
+
+## Support
+
+If you enjoy the project and want to support us financially, check out our Patreon!
+
+<a href="https://www.patreon.com/yuzuteam">
+ <img src="https://c5.patreon.com/external/logo/become_a_patron_button@2x.png" width="160">
+</a>
+
+Any donations received will go towards things like:
* Switch consoles to explore and reverse-engineer the hardware
* Switch games for testing, reverse-engineering, and implementing new features
* Web hosting and infrastructure setup
* Software licenses (e.g. Visual Studio, IDA Pro, etc.)
* Additional hardware (e.g. GPUs as-needed to improve rendering support, other peripherals to add support for, etc.)
-We also more than gladly accept used Switch consoles, preferably ones with firmware 3.0.0 or lower! If you would like to give yours away, don't hesitate to join our [Discord](https://discord.gg/VXqngT3) and talk to bunnei. You may also contact: donations@yuzu-emu.org.
+If you wish to support us a different way, please join our [Discord](https://discord.gg/u77vRWY) and talk to bunnei. You may also contact: donations@yuzu-emu.org.
+
+## License
+
+yuzu is licensed under the GPLv2 (or any later version). Refer to the [license.txt](https://github.com/yuzu-emu/yuzu/blob/master/license.txt) file.
+
+The [Skyline-Emulator Team](https://github.com/skyline-emu/skyline) is exempt from GPLv2 for the contributions from all these contributors [FernandoS27](https://github.com/FernandoS27), [lioncash](https://github.com/lioncash), [bunnei](https://github.com/bunnei), [ReinUsesLisp](https://github.com/ReinUsesLisp), [Morph1984](https://github.com/Morph1984), [ogniK5377](https://github.com/ogniK5377), [german77](https://github.com/german77), [ameerj](https://github.com/ameerj), [Kelebek1](https://github.com/Kelebek1) and [lat9nq](https://github.com/lat9nq). They may only use the code from these contributors under Mozilla Public License, version 2.0.
diff --git a/dist/qt_themes/default/style.qss b/dist/qt_themes/default/style.qss
index cee219374..9915a40ba 100644
--- a/dist/qt_themes/default/style.qss
+++ b/dist/qt_themes/default/style.qss
@@ -38,6 +38,26 @@ QPushButton#RendererStatusBarButton:!checked {
color: #0066ff;
}
+QPushButton#GPUStatusBarButton {
+ color: #656565;
+ border: 1px solid transparent;
+ background-color: transparent;
+ padding: 0px 3px 0px 3px;
+ text-align: center;
+}
+
+QPushButton#GPUStatusBarButton:hover {
+ border: 1px solid #76797C;
+}
+
+QPushButton#GPUStatusBarButton:checked {
+ color: #ff8040;
+}
+
+QPushButton#GPUStatusBarButton:!checked {
+ color: #40dd40;
+}
+
QPushButton#buttonRefreshDevices {
min-width: 21px;
min-height: 21px;
diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss
index 3d0ccbb9e..dac2dba86 100644
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -1283,6 +1283,27 @@ QPushButton#RendererStatusBarButton:!checked {
color: #00ccdd;
}
+QPushButton#GPUStatusBarButton {
+ min-width: 0px;
+ color: #656565;
+ border: 1px solid transparent;
+ background-color: transparent;
+ padding: 0px 3px 0px 3px;
+ text-align: center;
+}
+
+QPushButton#GPUStatusBarButton:hover {
+ border: 1px solid #76797C;
+}
+
+QPushButton#GPUStatusBarButton:checked {
+ color: #ff8040;
+}
+
+QPushButton#GPUStatusBarButton:!checked {
+ color: #40dd40;
+}
+
QPushButton#buttonRefreshDevices {
min-width: 23px;
min-height: 23px;
diff --git a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
index 51bec2fd7..032d05ec6 100644
--- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
+++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
@@ -2186,6 +2186,27 @@ QPushButton#RendererStatusBarButton:!checked {
color: #00ccdd;
}
+QPushButton#GPUStatusBarButton {
+ min-width: 0px;
+ color: #656565;
+ border: 1px solid transparent;
+ background-color: transparent;
+ padding: 0px 3px 0px 3px;
+ text-align: center;
+}
+
+QPushButton#GPUStatusBarButton:hover {
+ border: 1px solid #76797C;
+}
+
+QPushButton#GPUStatusBarButton:checked {
+ color: #ff8040;
+}
+
+QPushButton#GPUStatusBarButton:!checked {
+ color: #40dd40;
+}
+
QPushButton#buttonRefreshDevices {
min-width: 19px;
min-height: 19px;
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index d1d1436da..4b8d35548 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -51,7 +51,7 @@ if (NOT LIBUSB_FOUND OR YUZU_USE_BUNDLED_LIBUSB)
endif()
# SDL2
-if (NOT SDL2_FOUND AND ENABLE_SDL2)
+if (YUZU_USE_EXTERNAL_SDL2)
if (NOT WIN32)
# Yuzu itself needs: Events Joystick Haptic Sensor Timers Audio
# Yuzu-cmd also needs: Video (depends on Loadso/Dlopen)
@@ -115,7 +115,7 @@ if (ENABLE_WEB_SERVICE)
# httplib
add_library(httplib INTERFACE)
- target_include_directories(httplib INTERFACE ./httplib)
+ target_include_directories(httplib INTERFACE ./cpp-httplib)
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
if (WIN32)
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
-Subproject 8188e3fbbc105591064093440f88081fb957d4f
+Subproject 07c4a37bcf41ea50aef6e98236abdfe8089fb4c
diff --git a/externals/cpp-httplib b/externals/cpp-httplib
new file mode 160000
+Subproject 9648f950f5a8a41d18833cf4a85f5821b1bcac5
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject 0c12614d1a7a72d778609920dde96a4c63074ec
+Subproject 7946868af49d403fe54c92d2d60ef986513d1fe
diff --git a/externals/httplib/README.md b/externals/httplib/README.md
deleted file mode 100644
index 1940e446c..000000000
--- a/externals/httplib/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-From https://github.com/yhirose/cpp-httplib/tree/ff5677ad197947177c158fe857caff4f0e242045 with https://github.com/yhirose/cpp-httplib/pull/701
-
-MIT License
-
-===
-
-cpp-httplib
-
-A C++11 header-only HTTP library.
-
-It's extremely easy to setup. Just include httplib.h file in your code!
-
-Inspired by Sinatra and express.
-
-© 2017 Yuji Hirose
diff --git a/externals/httplib/httplib.h b/externals/httplib/httplib.h
deleted file mode 100644
index 8982054e2..000000000
--- a/externals/httplib/httplib.h
+++ /dev/null
@@ -1,6714 +0,0 @@
-//
-// httplib.h
-//
-// Copyright (c) 2020 Yuji Hirose. All rights reserved.
-// MIT License
-//
-
-#ifndef CPPHTTPLIB_HTTPLIB_H
-#define CPPHTTPLIB_HTTPLIB_H
-
-/*
- * Configuration
- */
-
-#ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND
-#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5
-#endif
-
-#ifndef CPPHTTPLIB_KEEPALIVE_MAX_COUNT
-#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 5
-#endif
-
-#ifndef CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND
-#define CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND 300
-#endif
-
-#ifndef CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND
-#define CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND 0
-#endif
-
-#ifndef CPPHTTPLIB_READ_TIMEOUT_SECOND
-#define CPPHTTPLIB_READ_TIMEOUT_SECOND 5
-#endif
-
-#ifndef CPPHTTPLIB_READ_TIMEOUT_USECOND
-#define CPPHTTPLIB_READ_TIMEOUT_USECOND 0
-#endif
-
-#ifndef CPPHTTPLIB_WRITE_TIMEOUT_SECOND
-#define CPPHTTPLIB_WRITE_TIMEOUT_SECOND 5
-#endif
-
-#ifndef CPPHTTPLIB_WRITE_TIMEOUT_USECOND
-#define CPPHTTPLIB_WRITE_TIMEOUT_USECOND 0
-#endif
-
-#ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND
-#define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0
-#endif
-
-#ifndef CPPHTTPLIB_IDLE_INTERVAL_USECOND
-#ifdef _WIN32
-#define CPPHTTPLIB_IDLE_INTERVAL_USECOND 10000
-#else
-#define CPPHTTPLIB_IDLE_INTERVAL_USECOND 0
-#endif
-#endif
-
-#ifndef CPPHTTPLIB_REQUEST_URI_MAX_LENGTH
-#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 8192
-#endif
-
-#ifndef CPPHTTPLIB_REDIRECT_MAX_COUNT
-#define CPPHTTPLIB_REDIRECT_MAX_COUNT 20
-#endif
-
-#ifndef CPPHTTPLIB_PAYLOAD_MAX_LENGTH
-#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH ((std::numeric_limits<size_t>::max)())
-#endif
-
-#ifndef CPPHTTPLIB_TCP_NODELAY
-#define CPPHTTPLIB_TCP_NODELAY false
-#endif
-
-#ifndef CPPHTTPLIB_RECV_BUFSIZ
-#define CPPHTTPLIB_RECV_BUFSIZ size_t(4096u)
-#endif
-
-#ifndef CPPHTTPLIB_COMPRESSION_BUFSIZ
-#define CPPHTTPLIB_COMPRESSION_BUFSIZ size_t(16384u)
-#endif
-
-#ifndef CPPHTTPLIB_THREAD_POOL_COUNT
-#define CPPHTTPLIB_THREAD_POOL_COUNT \
- ((std::max)(8u, std::thread::hardware_concurrency() > 0 \
- ? std::thread::hardware_concurrency() - 1 \
- : 0))
-#endif
-
-/*
- * Headers
- */
-
-#ifdef _WIN32
-#ifndef _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_WARNINGS
-#endif //_CRT_SECURE_NO_WARNINGS
-
-#ifndef _CRT_NONSTDC_NO_DEPRECATE
-#define _CRT_NONSTDC_NO_DEPRECATE
-#endif //_CRT_NONSTDC_NO_DEPRECATE
-
-#if defined(_MSC_VER)
-#ifdef _WIN64
-using ssize_t = __int64;
-#else
-using ssize_t = int;
-#endif
-
-#if _MSC_VER < 1900
-#define snprintf _snprintf_s
-#endif
-#endif // _MSC_VER
-
-#ifndef S_ISREG
-#define S_ISREG(m) (((m)&S_IFREG) == S_IFREG)
-#endif // S_ISREG
-
-#ifndef S_ISDIR
-#define S_ISDIR(m) (((m)&S_IFDIR) == S_IFDIR)
-#endif // S_ISDIR
-
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif // NOMINMAX
-
-#include <io.h>
-#include <winsock2.h>
-
-#include <wincrypt.h>
-#include <ws2tcpip.h>
-
-#ifndef WSA_FLAG_NO_HANDLE_INHERIT
-#define WSA_FLAG_NO_HANDLE_INHERIT 0x80
-#endif
-
-#ifdef _MSC_VER
-#pragma comment(lib, "ws2_32.lib")
-#pragma comment(lib, "crypt32.lib")
-#pragma comment(lib, "cryptui.lib")
-#endif
-
-#ifndef strcasecmp
-#define strcasecmp _stricmp
-#endif // strcasecmp
-
-using socket_t = SOCKET;
-#ifdef CPPHTTPLIB_USE_POLL
-#define poll(fds, nfds, timeout) WSAPoll(fds, nfds, timeout)
-#endif
-
-#else // not _WIN32
-
-#include <arpa/inet.h>
-#include <cstring>
-#include <ifaddrs.h>
-#include <netdb.h>
-#include <netinet/in.h>
-#ifdef __linux__
-#include <resolv.h>
-#endif
-#include <netinet/tcp.h>
-#ifdef CPPHTTPLIB_USE_POLL
-#include <poll.h>
-#endif
-#include <csignal>
-#include <pthread.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-#include <unistd.h>
-
-using socket_t = int;
-#define INVALID_SOCKET (-1)
-#endif //_WIN32
-
-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <cassert>
-#include <cctype>
-#include <climits>
-#include <condition_variable>
-#include <errno.h>
-#include <fcntl.h>
-#include <fstream>
-#include <functional>
-#include <iostream>
-#include <list>
-#include <map>
-#include <memory>
-#include <mutex>
-#include <random>
-#include <regex>
-#include <sstream>
-#include <string>
-#include <sys/stat.h>
-#include <thread>
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-#include <openssl/err.h>
-#include <openssl/md5.h>
-#include <openssl/ssl.h>
-#include <openssl/x509v3.h>
-
-#if defined(_WIN32) && defined(OPENSSL_USE_APPLINK)
-#include <openssl/applink.c>
-#endif
-
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-
-#if OPENSSL_VERSION_NUMBER < 0x1010100fL
-#error Sorry, OpenSSL versions prior to 1.1.1 are not supported
-#endif
-
-#if OPENSSL_VERSION_NUMBER < 0x10100000L
-#include <openssl/crypto.h>
-inline const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *asn1) {
- return M_ASN1_STRING_data(asn1);
-}
-#endif
-#endif
-
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
-#include <zlib.h>
-#endif
-
-#ifdef CPPHTTPLIB_BROTLI_SUPPORT
-#include <brotli/decode.h>
-#include <brotli/encode.h>
-#endif
-
-/*
- * Declaration
- */
-namespace httplib {
-
-namespace detail {
-
-/*
- * Backport std::make_unique from C++14.
- *
- * NOTE: This code came up with the following stackoverflow post:
- * https://stackoverflow.com/questions/10149840/c-arrays-and-make-unique
- *
- */
-
-template <class T, class... Args>
-typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type
-make_unique(Args &&... args) {
- return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
-template <class T>
-typename std::enable_if<std::is_array<T>::value, std::unique_ptr<T>>::type
-make_unique(std::size_t n) {
- typedef typename std::remove_extent<T>::type RT;
- return std::unique_ptr<T>(new RT[n]);
-}
-
-struct ci {
- bool operator()(const std::string &s1, const std::string &s2) const {
- return std::lexicographical_compare(
- s1.begin(), s1.end(), s2.begin(), s2.end(),
- [](char c1, char c2) { return ::tolower(c1) < ::tolower(c2); });
- }
-};
-
-} // namespace detail
-
-using Headers = std::multimap<std::string, std::string, detail::ci>;
-
-using Params = std::multimap<std::string, std::string>;
-using Match = std::smatch;
-
-using Progress = std::function<bool(uint64_t current, uint64_t total)>;
-
-struct Response;
-using ResponseHandler = std::function<bool(const Response &response)>;
-
-struct MultipartFormData {
- std::string name;
- std::string content;
- std::string filename;
- std::string content_type;
-};
-using MultipartFormDataItems = std::vector<MultipartFormData>;
-using MultipartFormDataMap = std::multimap<std::string, MultipartFormData>;
-
-class DataSink {
-public:
- DataSink() : os(&sb_), sb_(*this) {}
-
- DataSink(const DataSink &) = delete;
- DataSink &operator=(const DataSink &) = delete;
- DataSink(DataSink &&) = delete;
- DataSink &operator=(DataSink &&) = delete;
-
- std::function<void(const char *data, size_t data_len)> write;
- std::function<void()> done;
- std::function<bool()> is_writable;
- std::ostream os;
-
-private:
- class data_sink_streambuf : public std::streambuf {
- public:
- explicit data_sink_streambuf(DataSink &sink) : sink_(sink) {}
-
- protected:
- std::streamsize xsputn(const char *s, std::streamsize n) {
- sink_.write(s, static_cast<size_t>(n));
- return n;
- }
-
- private:
- DataSink &sink_;
- };
-
- data_sink_streambuf sb_;
-};
-
-using ContentProvider =
- std::function<bool(size_t offset, size_t length, DataSink &sink)>;
-
-using ContentProviderWithoutLength =
- std::function<bool(size_t offset, DataSink &sink)>;
-
-using ContentReceiverWithProgress =
- std::function<bool(const char *data, size_t data_length, uint64_t offset,
- uint64_t total_length)>;
-
-using ContentReceiver =
- std::function<bool(const char *data, size_t data_length)>;
-
-using MultipartContentHeader =
- std::function<bool(const MultipartFormData &file)>;
-
-class ContentReader {
-public:
- using Reader = std::function<bool(ContentReceiver receiver)>;
- using MultipartReader = std::function<bool(MultipartContentHeader header,
- ContentReceiver receiver)>;
-
- ContentReader(Reader reader, MultipartReader multipart_reader)
- : reader_(std::move(reader)),
- multipart_reader_(std::move(multipart_reader)) {}
-
- bool operator()(MultipartContentHeader header,
- ContentReceiver receiver) const {
- return multipart_reader_(std::move(header), std::move(receiver));
- }
-
- bool operator()(ContentReceiver receiver) const {
- return reader_(std::move(receiver));
- }
-
- Reader reader_;
- MultipartReader multipart_reader_;
-};
-
-using Range = std::pair<ssize_t, ssize_t>;
-using Ranges = std::vector<Range>;
-
-struct Request {
- std::string method;
- std::string path;
- Headers headers;
- std::string body;
-
- std::string remote_addr;
- int remote_port = -1;
-
- // for server
- std::string version;
- std::string target;
- Params params;
- MultipartFormDataMap files;
- Ranges ranges;
- Match matches;
-
- // for client
- size_t redirect_count = CPPHTTPLIB_REDIRECT_MAX_COUNT;
- ResponseHandler response_handler;
- ContentReceiverWithProgress content_receiver;
- size_t content_length = 0;
- ContentProvider content_provider;
- Progress progress;
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- const SSL *ssl;
-#endif
-
- bool has_header(const char *key) const;
- std::string get_header_value(const char *key, size_t id = 0) const;
- template <typename T>
- T get_header_value(const char *key, size_t id = 0) const;
- size_t get_header_value_count(const char *key) const;
- void set_header(const char *key, const char *val);
- void set_header(const char *key, const std::string &val);
-
- bool has_param(const char *key) const;
- std::string get_param_value(const char *key, size_t id = 0) const;
- size_t get_param_value_count(const char *key) const;
-
- bool is_multipart_form_data() const;
-
- bool has_file(const char *key) const;
- MultipartFormData get_file_value(const char *key) const;
-
- // private members...
- size_t authorization_count_ = 0;
-};
-
-struct Response {
- std::string version;
- int status = -1;
- std::string reason;
- Headers headers;
- std::string body;
-
- bool has_header(const char *key) const;
- std::string get_header_value(const char *key, size_t id = 0) const;
- template <typename T>
- T get_header_value(const char *key, size_t id = 0) const;
- size_t get_header_value_count(const char *key) const;
- void set_header(const char *key, const char *val);
- void set_header(const char *key, const std::string &val);
-
- void set_redirect(const char *url, int status = 302);
- void set_redirect(const std::string &url, int status = 302);
- void set_content(const char *s, size_t n, const char *content_type);
- void set_content(std::string s, const char *content_type);
-
- void set_content_provider(
- size_t length, const char *content_type, ContentProvider provider,
- const std::function<void()> &resource_releaser = nullptr);
-
- void set_content_provider(
- const char *content_type, ContentProviderWithoutLength provider,
- const std::function<void()> &resource_releaser = nullptr);
-
- void set_chunked_content_provider(
- const char *content_type, ContentProviderWithoutLength provider,
- const std::function<void()> &resource_releaser = nullptr);
-
- Response() = default;
- Response(const Response &) = default;
- Response &operator=(const Response &) = default;
- Response(Response &&) = default;
- Response &operator=(Response &&) = default;
- ~Response() {
- if (content_provider_resource_releaser_) {
- content_provider_resource_releaser_();
- }
- }
-
- // private members...
- size_t content_length_ = 0;
- ContentProvider content_provider_;
- std::function<void()> content_provider_resource_releaser_;
- bool is_chunked_content_provider = false;
-};
-
-class Stream {
-public:
- virtual ~Stream() = default;
-
- virtual bool is_readable() const = 0;
- virtual bool is_writable() const = 0;
-
- virtual ssize_t read(char *ptr, size_t size) = 0;
- virtual ssize_t write(const char *ptr, size_t size) = 0;
- virtual void get_remote_ip_and_port(std::string &ip, int &port) const = 0;
-
- template <typename... Args>
- ssize_t write_format(const char *fmt, const Args &... args);
- ssize_t write(const char *ptr);
- ssize_t write(const std::string &s);
-};
-
-class TaskQueue {
-public:
- TaskQueue() = default;
- virtual ~TaskQueue() = default;
-
- virtual void enqueue(std::function<void()> fn) = 0;
- virtual void shutdown() = 0;
-
- virtual void on_idle(){};
-};
-
-class ThreadPool : public TaskQueue {
-public:
- explicit ThreadPool(size_t n) : shutdown_(false) {
- while (n) {
- threads_.emplace_back(worker(*this));
- n--;
- }
- }
-
- ThreadPool(const ThreadPool &) = delete;
- ~ThreadPool() override = default;
-
- void enqueue(std::function<void()> fn) override {
- std::unique_lock<std::mutex> lock(mutex_);
- jobs_.push_back(std::move(fn));
- cond_.notify_one();
- }
-
- void shutdown() override {
- // Stop all worker threads...
- {
- std::unique_lock<std::mutex> lock(mutex_);
- shutdown_ = true;
- }
-
- cond_.notify_all();
-
- // Join...
- for (auto &t : threads_) {
- t.join();
- }
- }
-
-private:
- struct worker {
- explicit worker(ThreadPool &pool) : pool_(pool) {}
-
- void operator()() {
- for (;;) {
- std::function<void()> fn;
- {
- std::unique_lock<std::mutex> lock(pool_.mutex_);
-
- pool_.cond_.wait(
- lock, [&] { return !pool_.jobs_.empty() || pool_.shutdown_; });
-
- if (pool_.shutdown_ && pool_.jobs_.empty()) { break; }
-
- fn = pool_.jobs_.front();
- pool_.jobs_.pop_front();
- }
-
- assert(true == static_cast<bool>(fn));
- fn();
- }
- }
-
- ThreadPool &pool_;
- };
- friend struct worker;
-
- std::vector<std::thread> threads_;
- std::list<std::function<void()>> jobs_;
-
- bool shutdown_;
-
- std::condition_variable cond_;
- std::mutex mutex_;
-};
-
-using Logger = std::function<void(const Request &, const Response &)>;
-
-using SocketOptions = std::function<void(socket_t sock)>;
-
-inline void default_socket_options(socket_t sock) {
- int yes = 1;
-#ifdef _WIN32
- setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast<char *>(&yes),
- sizeof(yes));
- setsockopt(sock, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
- reinterpret_cast<char *>(&yes), sizeof(yes));
-#else
-#ifdef SO_REUSEPORT
- setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, reinterpret_cast<void *>(&yes),
- sizeof(yes));
-#else
- setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast<void *>(&yes),
- sizeof(yes));
-#endif
-#endif
-}
-
-class Server {
-public:
- using Handler = std::function<void(const Request &, Response &)>;
- using HandlerWithContentReader = std::function<void(
- const Request &, Response &, const ContentReader &content_reader)>;
- using Expect100ContinueHandler =
- std::function<int(const Request &, Response &)>;
-
- Server();
-
- virtual ~Server();
-
- virtual bool is_valid() const;
-
- Server &Get(const char *pattern, Handler handler);
- Server &Post(const char *pattern, Handler handler);
- Server &Post(const char *pattern, HandlerWithContentReader handler);
- Server &Put(const char *pattern, Handler handler);
- Server &Put(const char *pattern, HandlerWithContentReader handler);
- Server &Patch(const char *pattern, Handler handler);
- Server &Patch(const char *pattern, HandlerWithContentReader handler);
- Server &Delete(const char *pattern, Handler handler);
- Server &Delete(const char *pattern, HandlerWithContentReader handler);
- Server &Options(const char *pattern, Handler handler);
-
- bool set_base_dir(const char *dir, const char *mount_point = nullptr);
- bool set_mount_point(const char *mount_point, const char *dir,
- Headers headers = Headers());
- bool remove_mount_point(const char *mount_point);
- void set_file_extension_and_mimetype_mapping(const char *ext,
- const char *mime);
- void set_file_request_handler(Handler handler);
-
- void set_error_handler(Handler handler);
- void set_expect_100_continue_handler(Expect100ContinueHandler handler);
- void set_logger(Logger logger);
-
- void set_tcp_nodelay(bool on);
- void set_socket_options(SocketOptions socket_options);
-
- void set_keep_alive_max_count(size_t count);
- void set_keep_alive_timeout(time_t sec);
- void set_read_timeout(time_t sec, time_t usec = 0);
- void set_write_timeout(time_t sec, time_t usec = 0);
- void set_idle_interval(time_t sec, time_t usec = 0);
-
- void set_payload_max_length(size_t length);
-
- bool bind_to_port(const char *host, int port, int socket_flags = 0);
- int bind_to_any_port(const char *host, int socket_flags = 0);
- bool listen_after_bind();
-
- bool listen(const char *host, int port, int socket_flags = 0);
-
- bool is_running() const;
- void stop();
-
- std::function<TaskQueue *(void)> new_task_queue;
-
-protected:
- bool process_request(Stream &strm, bool close_connection,
- bool &connection_closed,
- const std::function<void(Request &)> &setup_request);
-
- std::atomic<socket_t> svr_sock_;
- size_t keep_alive_max_count_ = CPPHTTPLIB_KEEPALIVE_MAX_COUNT;
- time_t keep_alive_timeout_sec_ = CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND;
- time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND;
- time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND;
- time_t write_timeout_sec_ = CPPHTTPLIB_WRITE_TIMEOUT_SECOND;
- time_t write_timeout_usec_ = CPPHTTPLIB_WRITE_TIMEOUT_USECOND;
- time_t idle_interval_sec_ = CPPHTTPLIB_IDLE_INTERVAL_SECOND;
- time_t idle_interval_usec_ = CPPHTTPLIB_IDLE_INTERVAL_USECOND;
- size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH;
-
-private:
- using Handlers = std::vector<std::pair<std::regex, Handler>>;
- using HandlersForContentReader =
- std::vector<std::pair<std::regex, HandlerWithContentReader>>;
-
- socket_t create_server_socket(const char *host, int port, int socket_flags,
- SocketOptions socket_options) const;
- int bind_internal(const char *host, int port, int socket_flags);
- bool listen_internal();
-
- bool routing(Request &req, Response &res, Stream &strm);
- bool handle_file_request(Request &req, Response &res, bool head = false);
- bool dispatch_request(Request &req, Response &res, const Handlers &handlers);
- bool
- dispatch_request_for_content_reader(Request &req, Response &res,
- ContentReader content_reader,
- const HandlersForContentReader &handlers);
-
- bool parse_request_line(const char *s, Request &req);
- bool write_response(Stream &strm, bool close_connection, const Request &req,
- Response &res);
- bool write_content_with_provider(Stream &strm, const Request &req,
- Response &res, const std::string &boundary,
- const std::string &content_type);
- bool read_content(Stream &strm, Request &req, Response &res);
- bool
- read_content_with_content_receiver(Stream &strm, Request &req, Response &res,
- ContentReceiver receiver,
- MultipartContentHeader multipart_header,
- ContentReceiver multipart_receiver);
- bool read_content_core(Stream &strm, Request &req, Response &res,
- ContentReceiver receiver,
- MultipartContentHeader mulitpart_header,
- ContentReceiver multipart_receiver);
-
- virtual bool process_and_close_socket(socket_t sock);
-
- struct MountPointEntry {
- std::string mount_point;
- std::string base_dir;
- Headers headers;
- };
- std::vector<MountPointEntry> base_dirs_;
-
- std::atomic<bool> is_running_;
- std::map<std::string, std::string> file_extension_and_mimetype_map_;
- Handler file_request_handler_;
- Handlers get_handlers_;
- Handlers post_handlers_;
- HandlersForContentReader post_handlers_for_content_reader_;
- Handlers put_handlers_;
- HandlersForContentReader put_handlers_for_content_reader_;
- Handlers patch_handlers_;
- HandlersForContentReader patch_handlers_for_content_reader_;
- Handlers delete_handlers_;
- HandlersForContentReader delete_handlers_for_content_reader_;
- Handlers options_handlers_;
- Handler error_handler_;
- Logger logger_;
- Expect100ContinueHandler expect_100_continue_handler_;
-
- bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY;
- SocketOptions socket_options_ = default_socket_options;
-};
-
-enum Error {
- Success = 0,
- Unknown,
- Connection,
- BindIPAddress,
- Read,
- Write,
- ExceedRedirectCount,
- Canceled,
- SSLConnection,
- SSLLoadingCerts,
- SSLServerVerification,
- UnsupportedMultipartBoundaryChars
-};
-
-class Result {
-public:
- Result(std::unique_ptr<Response> res, Error err)
- : res_(std::move(res)), err_(err) {}
- operator bool() const { return res_ != nullptr; }
- bool operator==(std::nullptr_t) const { return res_ == nullptr; }
- bool operator!=(std::nullptr_t) const { return res_ != nullptr; }
- const Response &value() const { return *res_; }
- Response &value() { return *res_; }
- const Response &operator*() const { return *res_; }
- Response &operator*() { return *res_; }
- const Response *operator->() const { return res_.get(); }
- Response *operator->() { return res_.get(); }
- Error error() const { return err_; }
-
-private:
- std::unique_ptr<Response> res_;
- Error err_;
-};
-
-class ClientImpl {
-public:
- explicit ClientImpl(const std::string &host);
-
- explicit ClientImpl(const std::string &host, int port);
-
- explicit ClientImpl(const std::string &host, int port,
- const std::string &client_cert_path,
- const std::string &client_key_path);
-
- virtual ~ClientImpl();
-
- virtual bool is_valid() const;
-
- Result Get(const char *path);
- Result Get(const char *path, const Headers &headers);
- Result Get(const char *path, Progress progress);
- Result Get(const char *path, const Headers &headers, Progress progress);
- Result Get(const char *path, ContentReceiver content_receiver);
- Result Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver);
- Result Get(const char *path, ContentReceiver content_receiver,
- Progress progress);
- Result Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver, Progress progress);
- Result Get(const char *path, ResponseHandler response_handler,
- ContentReceiver content_receiver);
- Result Get(const char *path, const Headers &headers,
- ResponseHandler response_handler,
- ContentReceiver content_receiver);
- Result Get(const char *path, ResponseHandler response_handler,
- ContentReceiver content_receiver, Progress progress);
- Result Get(const char *path, const Headers &headers,
- ResponseHandler response_handler, ContentReceiver content_receiver,
- Progress progress);
-
- Result Head(const char *path);
- Result Head(const char *path, const Headers &headers);
-
- Result Post(const char *path);
- Result Post(const char *path, const std::string &body,
- const char *content_type);
- Result Post(const char *path, const Headers &headers, const std::string &body,
- const char *content_type);
- Result Post(const char *path, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Post(const char *path, const Headers &headers, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Post(const char *path, const Params &params);
- Result Post(const char *path, const Headers &headers, const Params &params);
- Result Post(const char *path, const MultipartFormDataItems &items);
- Result Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items);
- Result Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items, const std::string &boundary);
-
- Result Put(const char *path);
- Result Put(const char *path, const std::string &body,
- const char *content_type);
- Result Put(const char *path, const Headers &headers, const std::string &body,
- const char *content_type);
- Result Put(const char *path, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Put(const char *path, const Headers &headers, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Put(const char *path, const Params &params);
- Result Put(const char *path, const Headers &headers, const Params &params);
-
- Result Patch(const char *path, const std::string &body,
- const char *content_type);
- Result Patch(const char *path, const Headers &headers,
- const std::string &body, const char *content_type);
- Result Patch(const char *path, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Patch(const char *path, const Headers &headers, size_t content_length,
- ContentProvider content_provider, const char *content_type);
-
- Result Delete(const char *path);
- Result Delete(const char *path, const std::string &body,
- const char *content_type);
- Result Delete(const char *path, const Headers &headers);
- Result Delete(const char *path, const Headers &headers,
- const std::string &body, const char *content_type);
-
- Result Options(const char *path);
- Result Options(const char *path, const Headers &headers);
-
- bool send(const Request &req, Response &res);
-
- size_t is_socket_open() const;
-
- void stop();
-
- void set_default_headers(Headers headers);
-
- void set_tcp_nodelay(bool on);
- void set_socket_options(SocketOptions socket_options);
-
- void set_connection_timeout(time_t sec, time_t usec = 0);
- void set_read_timeout(time_t sec, time_t usec = 0);
- void set_write_timeout(time_t sec, time_t usec = 0);
-
- void set_basic_auth(const char *username, const char *password);
- void set_bearer_token_auth(const char *token);
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void set_digest_auth(const char *username, const char *password);
-#endif
-
- void set_keep_alive(bool on);
- void set_follow_location(bool on);
-
- void set_compress(bool on);
-
- void set_decompress(bool on);
-
- void set_interface(const char *intf);
-
- void set_proxy(const char *host, int port);
- void set_proxy_basic_auth(const char *username, const char *password);
- void set_proxy_bearer_token_auth(const char *token);
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void set_proxy_digest_auth(const char *username, const char *password);
-#endif
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void enable_server_certificate_verification(bool enabled);
-#endif
-
- void set_logger(Logger logger);
-
-protected:
- struct Socket {
- socket_t sock = INVALID_SOCKET;
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- SSL *ssl = nullptr;
-#endif
-
- bool is_open() const { return sock != INVALID_SOCKET; }
- };
-
- virtual bool create_and_connect_socket(Socket &socket);
-
- // All of:
- // shutdown_ssl
- // shutdown_socket
- // close_socket
- // should ONLY be called when socket_mutex_ is locked.
- // Also, shutdown_ssl and close_socket should also NOT be called concurrently
- // with a DIFFERENT thread sending requests using that socket.
- virtual void shutdown_ssl(Socket &socket, bool shutdown_gracefully);
- void shutdown_socket(Socket &socket);
- void close_socket(Socket &socket);
-
- // Similar to shutdown_ssl and close_socket, this should NOT be called
- // concurrently with a DIFFERENT thread sending requests from the socket
- void lock_socket_and_shutdown_and_close();
-
- bool process_request(Stream &strm, const Request &req, Response &res,
- bool close_connection);
-
- Error get_last_error() const;
-
- void copy_settings(const ClientImpl &rhs);
-
- // Error state
- mutable std::atomic<Error> error_;
-
- // Socket endoint information
- const std::string host_;
- const int port_;
- const std::string host_and_port_;
-
- // Current open socket
- Socket socket_;
- mutable std::mutex socket_mutex_;
- std::recursive_mutex request_mutex_;
-
- // These are all protected under socket_mutex
- int socket_requests_in_flight_ = 0;
- std::thread::id socket_requests_are_from_thread_ = std::thread::id();
- bool socket_should_be_closed_when_request_is_done_ = false;
-
- // Default headers
- Headers default_headers_;
-
- // Settings
- std::string client_cert_path_;
- std::string client_key_path_;
-
- time_t connection_timeout_sec_ = CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND;
- time_t connection_timeout_usec_ = CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND;
- time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND;
- time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND;
- time_t write_timeout_sec_ = CPPHTTPLIB_WRITE_TIMEOUT_SECOND;
- time_t write_timeout_usec_ = CPPHTTPLIB_WRITE_TIMEOUT_USECOND;
-
- std::string basic_auth_username_;
- std::string basic_auth_password_;
- std::string bearer_token_auth_token_;
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- std::string digest_auth_username_;
- std::string digest_auth_password_;
-#endif
-
- bool keep_alive_ = false;
- bool follow_location_ = false;
-
- bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY;
- SocketOptions socket_options_ = nullptr;
-
- bool compress_ = false;
- bool decompress_ = true;
-
- std::string interface_;
-
- std::string proxy_host_;
- int proxy_port_ = -1;
-
- std::string proxy_basic_auth_username_;
- std::string proxy_basic_auth_password_;
- std::string proxy_bearer_token_auth_token_;
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- std::string proxy_digest_auth_username_;
- std::string proxy_digest_auth_password_;
-#endif
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- bool server_certificate_verification_ = true;
-#endif
-
- Logger logger_;
-
-private:
- socket_t create_client_socket() const;
- bool read_response_line(Stream &strm, Response &res);
- bool write_request(Stream &strm, const Request &req, bool close_connection);
- bool redirect(const Request &req, Response &res);
- bool handle_request(Stream &strm, const Request &req, Response &res,
- bool close_connection);
- std::unique_ptr<Response> send_with_content_provider(
- const char *method, const char *path, const Headers &headers,
- const std::string &body, size_t content_length,
- ContentProvider content_provider, const char *content_type);
-
- // socket is const because this function is called when socket_mutex_ is not locked
- virtual bool process_socket(const Socket &socket,
- std::function<bool(Stream &strm)> callback);
- virtual bool is_ssl() const;
-};
-
-class Client {
-public:
- // Universal interface
- explicit Client(const char *scheme_host_port);
-
- explicit Client(const char *scheme_host_port,
- const std::string &client_cert_path,
- const std::string &client_key_path);
-
- // HTTP only interface
- explicit Client(const std::string &host, int port);
-
- explicit Client(const std::string &host, int port,
- const std::string &client_cert_path,
- const std::string &client_key_path);
-
- ~Client();
-
- bool is_valid() const;
-
- Result Get(const char *path);
- Result Get(const char *path, const Headers &headers);
- Result Get(const char *path, Progress progress);
- Result Get(const char *path, const Headers &headers, Progress progress);
- Result Get(const char *path, ContentReceiver content_receiver);
- Result Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver);
- Result Get(const char *path, ContentReceiver content_receiver,
- Progress progress);
- Result Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver, Progress progress);
- Result Get(const char *path, ResponseHandler response_handler,
- ContentReceiver content_receiver);
- Result Get(const char *path, const Headers &headers,
- ResponseHandler response_handler,
- ContentReceiver content_receiver);
- Result Get(const char *path, const Headers &headers,
- ResponseHandler response_handler, ContentReceiver content_receiver,
- Progress progress);
- Result Get(const char *path, ResponseHandler response_handler,
- ContentReceiver content_receiver, Progress progress);
-
- Result Head(const char *path);
- Result Head(const char *path, const Headers &headers);
-
- Result Post(const char *path);
- Result Post(const char *path, const std::string &body,
- const char *content_type);
- Result Post(const char *path, const Headers &headers, const std::string &body,
- const char *content_type);
- Result Post(const char *path, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Post(const char *path, const Headers &headers, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Post(const char *path, const Params &params);
- Result Post(const char *path, const Headers &headers, const Params &params);
- Result Post(const char *path, const MultipartFormDataItems &items);
- Result Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items);
- Result Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items, const std::string &boundary);
- Result Put(const char *path);
- Result Put(const char *path, const std::string &body,
- const char *content_type);
- Result Put(const char *path, const Headers &headers, const std::string &body,
- const char *content_type);
- Result Put(const char *path, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Put(const char *path, const Headers &headers, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Put(const char *path, const Params &params);
- Result Put(const char *path, const Headers &headers, const Params &params);
- Result Patch(const char *path, const std::string &body,
- const char *content_type);
- Result Patch(const char *path, const Headers &headers,
- const std::string &body, const char *content_type);
- Result Patch(const char *path, size_t content_length,
- ContentProvider content_provider, const char *content_type);
- Result Patch(const char *path, const Headers &headers, size_t content_length,
- ContentProvider content_provider, const char *content_type);
-
- Result Delete(const char *path);
- Result Delete(const char *path, const std::string &body,
- const char *content_type);
- Result Delete(const char *path, const Headers &headers);
- Result Delete(const char *path, const Headers &headers,
- const std::string &body, const char *content_type);
-
- Result Options(const char *path);
- Result Options(const char *path, const Headers &headers);
-
- bool send(const Request &req, Response &res);
-
- size_t is_socket_open() const;
-
- void stop();
-
- void set_default_headers(Headers headers);
-
- void set_tcp_nodelay(bool on);
- void set_socket_options(SocketOptions socket_options);
-
- void set_connection_timeout(time_t sec, time_t usec = 0);
- void set_read_timeout(time_t sec, time_t usec = 0);
- void set_write_timeout(time_t sec, time_t usec = 0);
-
- void set_basic_auth(const char *username, const char *password);
- void set_bearer_token_auth(const char *token);
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void set_digest_auth(const char *username, const char *password);
-#endif
-
- void set_keep_alive(bool on);
- void set_follow_location(bool on);
-
- void set_compress(bool on);
-
- void set_decompress(bool on);
-
- void set_interface(const char *intf);
-
- void set_proxy(const char *host, int port);
- void set_proxy_basic_auth(const char *username, const char *password);
- void set_proxy_bearer_token_auth(const char *token);
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void set_proxy_digest_auth(const char *username, const char *password);
-#endif
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void enable_server_certificate_verification(bool enabled);
-#endif
-
- void set_logger(Logger logger);
-
- // SSL
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- void set_ca_cert_path(const char *ca_cert_file_path,
- const char *ca_cert_dir_path = nullptr);
-
- void set_ca_cert_store(X509_STORE *ca_cert_store);
-
- long get_openssl_verify_result() const;
-
- SSL_CTX *ssl_context() const;
-#endif
-
-private:
- std::unique_ptr<ClientImpl> cli_;
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- bool is_ssl_ = false;
-#endif
-}; // namespace httplib
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-class SSLServer : public Server {
-public:
- SSLServer(const char *cert_path, const char *private_key_path,
- const char *client_ca_cert_file_path = nullptr,
- const char *client_ca_cert_dir_path = nullptr);
-
- SSLServer(X509 *cert, EVP_PKEY *private_key,
- X509_STORE *client_ca_cert_store = nullptr);
-
- ~SSLServer() override;
-
- bool is_valid() const override;
-
-private:
- bool process_and_close_socket(socket_t sock) override;
-
- SSL_CTX *ctx_;
- std::mutex ctx_mutex_;
-};
-
-class SSLClient : public ClientImpl {
-public:
- explicit SSLClient(const std::string &host);
-
- explicit SSLClient(const std::string &host, int port);
-
- explicit SSLClient(const std::string &host, int port,
- const std::string &client_cert_path,
- const std::string &client_key_path);
-
- explicit SSLClient(const std::string &host, int port, X509 *client_cert,
- EVP_PKEY *client_key);
-
- ~SSLClient() override;
-
- bool is_valid() const override;
-
- void set_ca_cert_path(const char *ca_cert_file_path,
- const char *ca_cert_dir_path = nullptr);
-
- void set_ca_cert_store(X509_STORE *ca_cert_store);
-
- long get_openssl_verify_result() const;
-
- SSL_CTX *ssl_context() const;
-
-private:
- bool create_and_connect_socket(Socket &socket) override;
- void shutdown_ssl(Socket &socket, bool shutdown_gracefully) override;
-
- bool process_socket(const Socket &socket,
- std::function<bool(Stream &strm)> callback) override;
- bool is_ssl() const override;
-
- bool connect_with_proxy(Socket &sock, Response &res, bool &success);
- bool initialize_ssl(Socket &socket);
-
- bool load_certs();
-
- bool verify_host(X509 *server_cert) const;
- bool verify_host_with_subject_alt_name(X509 *server_cert) const;
- bool verify_host_with_common_name(X509 *server_cert) const;
- bool check_host_name(const char *pattern, size_t pattern_len) const;
-
- SSL_CTX *ctx_;
- std::mutex ctx_mutex_;
- std::once_flag initialize_cert_;
-
- std::vector<std::string> host_components_;
-
- std::string ca_cert_file_path_;
- std::string ca_cert_dir_path_;
- long verify_result_ = 0;
-
- friend class ClientImpl;
-};
-#endif
-
-// ----------------------------------------------------------------------------
-
-/*
- * Implementation
- */
-
-namespace detail {
-
-inline bool is_hex(char c, int &v) {
- if (0x20 <= c && isdigit(c)) {
- v = c - '0';
- return true;
- } else if ('A' <= c && c <= 'F') {
- v = c - 'A' + 10;
- return true;
- } else if ('a' <= c && c <= 'f') {
- v = c - 'a' + 10;
- return true;
- }
- return false;
-}
-
-inline bool from_hex_to_i(const std::string &s, size_t i, size_t cnt,
- int &val) {
- if (i >= s.size()) { return false; }
-
- val = 0;
- for (; cnt; i++, cnt--) {
- if (!s[i]) { return false; }
- int v = 0;
- if (is_hex(s[i], v)) {
- val = val * 16 + v;
- } else {
- return false;
- }
- }
- return true;
-}
-
-inline std::string from_i_to_hex(size_t n) {
- const char *charset = "0123456789abcdef";
- std::string ret;
- do {
- ret = charset[n & 15] + ret;
- n >>= 4;
- } while (n > 0);
- return ret;
-}
-
-inline bool start_with(const std::string &a, const std::string &b) {
- if (a.size() < b.size()) { return false; }
- for (size_t i = 0; i < b.size(); i++) {
- if (::tolower(a[i]) != ::tolower(b[i])) { return false; }
- }
- return true;
-}
-
-inline size_t to_utf8(int code, char *buff) {
- if (code < 0x0080) {
- buff[0] = (code & 0x7F);
- return 1;
- } else if (code < 0x0800) {
- buff[0] = static_cast<char>(0xC0 | ((code >> 6) & 0x1F));
- buff[1] = static_cast<char>(0x80 | (code & 0x3F));
- return 2;
- } else if (code < 0xD800) {
- buff[0] = static_cast<char>(0xE0 | ((code >> 12) & 0xF));
- buff[1] = static_cast<char>(0x80 | ((code >> 6) & 0x3F));
- buff[2] = static_cast<char>(0x80 | (code & 0x3F));
- return 3;
- } else if (code < 0xE000) { // D800 - DFFF is invalid...
- return 0;
- } else if (code < 0x10000) {
- buff[0] = static_cast<char>(0xE0 | ((code >> 12) & 0xF));
- buff[1] = static_cast<char>(0x80 | ((code >> 6) & 0x3F));
- buff[2] = static_cast<char>(0x80 | (code & 0x3F));
- return 3;
- } else if (code < 0x110000) {
- buff[0] = static_cast<char>(0xF0 | ((code >> 18) & 0x7));
- buff[1] = static_cast<char>(0x80 | ((code >> 12) & 0x3F));
- buff[2] = static_cast<char>(0x80 | ((code >> 6) & 0x3F));
- buff[3] = static_cast<char>(0x80 | (code & 0x3F));
- return 4;
- }
-
- // NOTREACHED
- return 0;
-}
-
-// NOTE: This code came up with the following stackoverflow post:
-// https://stackoverflow.com/questions/180947/base64-decode-snippet-in-c
-inline std::string base64_encode(const std::string &in) {
- static const auto lookup =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-
- std::string out;
- out.reserve(in.size());
-
- int val = 0;
- int valb = -6;
-
- for (auto c : in) {
- val = (val << 8) + static_cast<uint8_t>(c);
- valb += 8;
- while (valb >= 0) {
- out.push_back(lookup[(val >> valb) & 0x3F]);
- valb -= 6;
- }
- }
-
- if (valb > -6) { out.push_back(lookup[((val << 8) >> (valb + 8)) & 0x3F]); }
-
- while (out.size() % 4) {
- out.push_back('=');
- }
-
- return out;
-}
-
-inline bool is_file(const std::string &path) {
- struct stat st;
- return stat(path.c_str(), &st) >= 0 && S_ISREG(st.st_mode);
-}
-
-inline bool is_dir(const std::string &path) {
- struct stat st;
- return stat(path.c_str(), &st) >= 0 && S_ISDIR(st.st_mode);
-}
-
-inline bool is_valid_path(const std::string &path) {
- size_t level = 0;
- size_t i = 0;
-
- // Skip slash
- while (i < path.size() && path[i] == '/') {
- i++;
- }
-
- while (i < path.size()) {
- // Read component
- auto beg = i;
- while (i < path.size() && path[i] != '/') {
- i++;
- }
-
- auto len = i - beg;
- assert(len > 0);
-
- if (!path.compare(beg, len, ".")) {
- ;
- } else if (!path.compare(beg, len, "..")) {
- if (level == 0) { return false; }
- level--;
- } else {
- level++;
- }
-
- // Skip slash
- while (i < path.size() && path[i] == '/') {
- i++;
- }
- }
-
- return true;
-}
-
-inline std::string encode_url(const std::string &s) {
- std::string result;
-
- for (size_t i = 0; s[i]; i++) {
- switch (s[i]) {
- case ' ': result += "%20"; break;
- case '+': result += "%2B"; break;
- case '\r': result += "%0D"; break;
- case '\n': result += "%0A"; break;
- case '\'': result += "%27"; break;
- case ',': result += "%2C"; break;
- // case ':': result += "%3A"; break; // ok? probably...
- case ';': result += "%3B"; break;
- default:
- auto c = static_cast<uint8_t>(s[i]);
- if (c >= 0x80) {
- result += '%';
- char hex[4];
- auto len = snprintf(hex, sizeof(hex) - 1, "%02X", c);
- assert(len == 2);
- result.append(hex, static_cast<size_t>(len));
- } else {
- result += s[i];
- }
- break;
- }
- }
-
- return result;
-}
-
-inline std::string decode_url(const std::string &s,
- bool convert_plus_to_space) {
- std::string result;
-
- for (size_t i = 0; i < s.size(); i++) {
- if (s[i] == '%' && i + 1 < s.size()) {
- if (s[i + 1] == 'u') {
- int val = 0;
- if (from_hex_to_i(s, i + 2, 4, val)) {
- // 4 digits Unicode codes
- char buff[4];
- size_t len = to_utf8(val, buff);
- if (len > 0) { result.append(buff, len); }
- i += 5; // 'u0000'
- } else {
- result += s[i];
- }
- } else {
- int val = 0;
- if (from_hex_to_i(s, i + 1, 2, val)) {
- // 2 digits hex codes
- result += static_cast<char>(val);
- i += 2; // '00'
- } else {
- result += s[i];
- }
- }
- } else if (convert_plus_to_space && s[i] == '+') {
- result += ' ';
- } else {
- result += s[i];
- }
- }
-
- return result;
-}
-
-inline void read_file(const std::string &path, std::string &out) {
- std::ifstream fs(path, std::ios_base::binary);
- fs.seekg(0, std::ios_base::end);
- auto size = fs.tellg();
- fs.seekg(0);
- out.resize(static_cast<size_t>(size));
- fs.read(&out[0], static_cast<std::streamsize>(size));
-}
-
-inline std::string file_extension(const std::string &path) {
- std::smatch m;
- static auto re = std::regex("\\.([a-zA-Z0-9]+)$");
- if (std::regex_search(path, m, re)) { return m[1].str(); }
- return std::string();
-}
-
-inline bool is_space_or_tab(char c) { return c == ' ' || c == '\t'; }
-
-inline std::pair<size_t, size_t> trim(const char *b, const char *e, size_t left,
- size_t right) {
- while (b + left < e && is_space_or_tab(b[left])) {
- left++;
- }
- while (right > 0 && is_space_or_tab(b[right - 1])) {
- right--;
- }
- return std::make_pair(left, right);
-}
-
-inline std::string trim_copy(const std::string &s) {
- auto r = trim(s.data(), s.data() + s.size(), 0, s.size());
- return s.substr(r.first, r.second - r.first);
-}
-
-template <class Fn> void split(const char *b, const char *e, char d, Fn fn) {
- size_t i = 0;
- size_t beg = 0;
-
- while (e ? (b + i < e) : (b[i] != '\0')) {
- if (b[i] == d) {
- auto r = trim(b, e, beg, i);
- if (r.first < r.second) { fn(&b[r.first], &b[r.second]); }
- beg = i + 1;
- }
- i++;
- }
-
- if (i) {
- auto r = trim(b, e, beg, i);
- if (r.first < r.second) { fn(&b[r.first], &b[r.second]); }
- }
-}
-
-// NOTE: until the read size reaches `fixed_buffer_size`, use `fixed_buffer`
-// to store data. The call can set memory on stack for performance.
-class stream_line_reader {
-public:
- stream_line_reader(Stream &strm, char *fixed_buffer, size_t fixed_buffer_size)
- : strm_(strm), fixed_buffer_(fixed_buffer),
- fixed_buffer_size_(fixed_buffer_size) {}
-
- const char *ptr() const {
- if (glowable_buffer_.empty()) {
- return fixed_buffer_;
- } else {
- return glowable_buffer_.data();
- }
- }
-
- size_t size() const {
- if (glowable_buffer_.empty()) {
- return fixed_buffer_used_size_;
- } else {
- return glowable_buffer_.size();
- }
- }
-
- bool end_with_crlf() const {
- auto end = ptr() + size();
- return size() >= 2 && end[-2] == '\r' && end[-1] == '\n';
- }
-
- bool getline() {
- fixed_buffer_used_size_ = 0;
- glowable_buffer_.clear();
-
- for (size_t i = 0;; i++) {
- char byte;
- auto n = strm_.read(&byte, 1);
-
- if (n < 0) {
- return false;
- } else if (n == 0) {
- if (i == 0) {
- return false;
- } else {
- break;
- }
- }
-
- append(byte);
-
- if (byte == '\n') { break; }
- }
-
- return true;
- }
-
-private:
- void append(char c) {
- if (fixed_buffer_used_size_ < fixed_buffer_size_ - 1) {
- fixed_buffer_[fixed_buffer_used_size_++] = c;
- fixed_buffer_[fixed_buffer_used_size_] = '\0';
- } else {
- if (glowable_buffer_.empty()) {
- assert(fixed_buffer_[fixed_buffer_used_size_] == '\0');
- glowable_buffer_.assign(fixed_buffer_, fixed_buffer_used_size_);
- }
- glowable_buffer_ += c;
- }
- }
-
- Stream &strm_;
- char *fixed_buffer_;
- const size_t fixed_buffer_size_;
- size_t fixed_buffer_used_size_ = 0;
- std::string glowable_buffer_;
-};
-
-inline int close_socket(socket_t sock) {
-#ifdef _WIN32
- return closesocket(sock);
-#else
- return close(sock);
-#endif
-}
-
-template <typename T> inline ssize_t handle_EINTR(T fn) {
- ssize_t res = false;
- while (true) {
- res = fn();
- if (res < 0 && errno == EINTR) { continue; }
- break;
- }
- return res;
-}
-
-inline ssize_t select_read(socket_t sock, time_t sec, time_t usec) {
-#ifdef CPPHTTPLIB_USE_POLL
- struct pollfd pfd_read;
- pfd_read.fd = sock;
- pfd_read.events = POLLIN;
-
- auto timeout = static_cast<int>(sec * 1000 + usec / 1000);
-
- return handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); });
-#else
- fd_set fds;
- FD_ZERO(&fds);
- FD_SET(sock, &fds);
-
- timeval tv;
- tv.tv_sec = static_cast<long>(sec);
- tv.tv_usec = static_cast<decltype(tv.tv_usec)>(usec);
-
- return handle_EINTR([&]() {
- return select(static_cast<int>(sock + 1), &fds, nullptr, nullptr, &tv);
- });
-#endif
-}
-
-inline ssize_t select_write(socket_t sock, time_t sec, time_t usec) {
-#ifdef CPPHTTPLIB_USE_POLL
- struct pollfd pfd_read;
- pfd_read.fd = sock;
- pfd_read.events = POLLOUT;
-
- auto timeout = static_cast<int>(sec * 1000 + usec / 1000);
-
- return handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); });
-#else
- fd_set fds;
- FD_ZERO(&fds);
- FD_SET(sock, &fds);
-
- timeval tv;
- tv.tv_sec = static_cast<long>(sec);
- tv.tv_usec = static_cast<decltype(tv.tv_usec)>(usec);
-
- return handle_EINTR([&]() {
- return select(static_cast<int>(sock + 1), nullptr, &fds, nullptr, &tv);
- });
-#endif
-}
-
-inline bool wait_until_socket_is_ready(socket_t sock, time_t sec, time_t usec) {
-#ifdef CPPHTTPLIB_USE_POLL
- struct pollfd pfd_read;
- pfd_read.fd = sock;
- pfd_read.events = POLLIN | POLLOUT;
-
- auto timeout = static_cast<int>(sec * 1000 + usec / 1000);
-
- auto poll_res = handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); });
-
- if (poll_res > 0 && pfd_read.revents & (POLLIN | POLLOUT)) {
- int error = 0;
- socklen_t len = sizeof(error);
- auto res = getsockopt(sock, SOL_SOCKET, SO_ERROR,
- reinterpret_cast<char *>(&error), &len);
- return res >= 0 && !error;
- }
- return false;
-#else
- fd_set fdsr;
- FD_ZERO(&fdsr);
- FD_SET(sock, &fdsr);
-
- auto fdsw = fdsr;
- auto fdse = fdsr;
-
- timeval tv;
- tv.tv_sec = static_cast<long>(sec);
- tv.tv_usec = static_cast<decltype(tv.tv_usec)>(usec);
-
- auto ret = handle_EINTR([&]() {
- return select(static_cast<int>(sock + 1), &fdsr, &fdsw, &fdse, &tv);
- });
-
- if (ret > 0 && (FD_ISSET(sock, &fdsr) || FD_ISSET(sock, &fdsw))) {
- int error = 0;
- socklen_t len = sizeof(error);
- return getsockopt(sock, SOL_SOCKET, SO_ERROR,
- reinterpret_cast<char *>(&error), &len) >= 0 &&
- !error;
- }
- return false;
-#endif
-}
-
-class SocketStream : public Stream {
-public:
- SocketStream(socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec,
- time_t write_timeout_sec, time_t write_timeout_usec);
- ~SocketStream() override;
-
- bool is_readable() const override;
- bool is_writable() const override;
- ssize_t read(char *ptr, size_t size) override;
- ssize_t write(const char *ptr, size_t size) override;
- void get_remote_ip_and_port(std::string &ip, int &port) const override;
-
-private:
- socket_t sock_;
- time_t read_timeout_sec_;
- time_t read_timeout_usec_;
- time_t write_timeout_sec_;
- time_t write_timeout_usec_;
-};
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-class SSLSocketStream : public Stream {
-public:
- SSLSocketStream(socket_t sock, SSL *ssl, time_t read_timeout_sec,
- time_t read_timeout_usec, time_t write_timeout_sec,
- time_t write_timeout_usec);
- ~SSLSocketStream() override;
-
- bool is_readable() const override;
- bool is_writable() const override;
- ssize_t read(char *ptr, size_t size) override;
- ssize_t write(const char *ptr, size_t size) override;
- void get_remote_ip_and_port(std::string &ip, int &port) const override;
-
-private:
- socket_t sock_;
- SSL *ssl_;
- time_t read_timeout_sec_;
- time_t read_timeout_usec_;
- time_t write_timeout_sec_;
- time_t write_timeout_usec_;
-};
-#endif
-
-class BufferStream : public Stream {
-public:
- BufferStream() = default;
- ~BufferStream() override = default;
-
- bool is_readable() const override;
- bool is_writable() const override;
- ssize_t read(char *ptr, size_t size) override;
- ssize_t write(const char *ptr, size_t size) override;
- void get_remote_ip_and_port(std::string &ip, int &port) const override;
-
- const std::string &get_buffer() const;
-
-private:
- std::string buffer;
- size_t position = 0;
-};
-
-inline bool keep_alive(socket_t sock, time_t keep_alive_timeout_sec) {
- using namespace std::chrono;
- auto start = steady_clock::now();
- while (true) {
- auto val = select_read(sock, 0, 10000);
- if (val < 0) {
- return false;
- } else if (val == 0) {
- auto current = steady_clock::now();
- auto duration = duration_cast<milliseconds>(current - start);
- auto timeout = keep_alive_timeout_sec * 1000;
- if (duration.count() > timeout) { return false; }
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
- } else {
- return true;
- }
- }
-}
-
-template <typename T>
-inline bool
-process_server_socket_core(socket_t sock, size_t keep_alive_max_count,
- time_t keep_alive_timeout_sec, T callback) {
- assert(keep_alive_max_count > 0);
- auto ret = false;
- auto count = keep_alive_max_count;
- while (count > 0 && keep_alive(sock, keep_alive_timeout_sec)) {
- auto close_connection = count == 1;
- auto connection_closed = false;
- ret = callback(close_connection, connection_closed);
- if (!ret || connection_closed) { break; }
- count--;
- }
- return ret;
-}
-
-template <typename T>
-inline bool
-process_server_socket(socket_t sock, size_t keep_alive_max_count,
- time_t keep_alive_timeout_sec, time_t read_timeout_sec,
- time_t read_timeout_usec, time_t write_timeout_sec,
- time_t write_timeout_usec, T callback) {
- return process_server_socket_core(
- sock, keep_alive_max_count, keep_alive_timeout_sec,
- [&](bool close_connection, bool &connection_closed) {
- SocketStream strm(sock, read_timeout_sec, read_timeout_usec,
- write_timeout_sec, write_timeout_usec);
- return callback(strm, close_connection, connection_closed);
- });
-}
-
-template <typename T>
-inline bool process_client_socket(socket_t sock, time_t read_timeout_sec,
- time_t read_timeout_usec,
- time_t write_timeout_sec,
- time_t write_timeout_usec, T callback) {
- SocketStream strm(sock, read_timeout_sec, read_timeout_usec,
- write_timeout_sec, write_timeout_usec);
- return callback(strm);
-}
-
-inline int shutdown_socket(socket_t sock) {
-#ifdef _WIN32
- return shutdown(sock, SD_BOTH);
-#else
- return shutdown(sock, SHUT_RDWR);
-#endif
-}
-
-template <typename BindOrConnect>
-socket_t create_socket(const char *host, int port, int socket_flags,
- bool tcp_nodelay, SocketOptions socket_options,
- BindOrConnect bind_or_connect) {
- // Get address info
- struct addrinfo hints;
- struct addrinfo *result;
-
- memset(&hints, 0, sizeof(struct addrinfo));
- hints.ai_family = AF_UNSPEC;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = socket_flags;
- hints.ai_protocol = 0;
-
- auto service = std::to_string(port);
-
- if (getaddrinfo(host, service.c_str(), &hints, &result)) {
-#ifdef __linux__
- res_init();
-#endif
- return INVALID_SOCKET;
- }
-
- for (auto rp = result; rp; rp = rp->ai_next) {
- // Create a socket
-#ifdef _WIN32
- auto sock = WSASocketW(rp->ai_family, rp->ai_socktype, rp->ai_protocol,
- nullptr, 0, WSA_FLAG_NO_HANDLE_INHERIT);
- /**
- * Since the WSA_FLAG_NO_HANDLE_INHERIT is only supported on Windows 7 SP1
- * and above the socket creation fails on older Windows Systems.
- *
- * Let's try to create a socket the old way in this case.
- *
- * Reference:
- * https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsasocketa
- *
- * WSA_FLAG_NO_HANDLE_INHERIT:
- * This flag is supported on Windows 7 with SP1, Windows Server 2008 R2 with
- * SP1, and later
- *
- */
- if (sock == INVALID_SOCKET) {
- sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
- }
-#else
- auto sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
-#endif
- if (sock == INVALID_SOCKET) { continue; }
-
-#ifndef _WIN32
- if (fcntl(sock, F_SETFD, FD_CLOEXEC) == -1) { continue; }
-#endif
-
- if (tcp_nodelay) {
- int yes = 1;
- setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast<char *>(&yes),
- sizeof(yes));
- }
-
- if (socket_options) { socket_options(sock); }
-
- if (rp->ai_family == AF_INET6) {
- int no = 0;
- setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, reinterpret_cast<char *>(&no),
- sizeof(no));
- }
-
- // bind or connect
- if (bind_or_connect(sock, *rp)) {
- freeaddrinfo(result);
- return sock;
- }
-
- close_socket(sock);
- }
-
- freeaddrinfo(result);
- return INVALID_SOCKET;
-}
-
-inline void set_nonblocking(socket_t sock, bool nonblocking) {
-#ifdef _WIN32
- auto flags = nonblocking ? 1UL : 0UL;
- ioctlsocket(sock, FIONBIO, &flags);
-#else
- auto flags = fcntl(sock, F_GETFL, 0);
- fcntl(sock, F_SETFL,
- nonblocking ? (flags | O_NONBLOCK) : (flags & (~O_NONBLOCK)));
-#endif
-}
-
-inline bool is_connection_error() {
-#ifdef _WIN32
- return WSAGetLastError() != WSAEWOULDBLOCK;
-#else
- return errno != EINPROGRESS;
-#endif
-}
-
-inline bool bind_ip_address(socket_t sock, const char *host) {
- struct addrinfo hints;
- struct addrinfo *result;
-
- memset(&hints, 0, sizeof(struct addrinfo));
- hints.ai_family = AF_UNSPEC;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_protocol = 0;
-
- if (getaddrinfo(host, "0", &hints, &result)) { return false; }
-
- auto ret = false;
- for (auto rp = result; rp; rp = rp->ai_next) {
- const auto &ai = *rp;
- if (!::bind(sock, ai.ai_addr, static_cast<socklen_t>(ai.ai_addrlen))) {
- ret = true;
- break;
- }
- }
-
- freeaddrinfo(result);
- return ret;
-}
-
-#if !defined _WIN32 && !defined ANDROID
-#define USE_IF2IP
-#endif
-
-#ifdef USE_IF2IP
-inline std::string if2ip(const std::string &ifn) {
- struct ifaddrs *ifap;
- getifaddrs(&ifap);
- for (auto ifa = ifap; ifa; ifa = ifa->ifa_next) {
- if (ifa->ifa_addr && ifn == ifa->ifa_name) {
- if (ifa->ifa_addr->sa_family == AF_INET) {
- auto sa = reinterpret_cast<struct sockaddr_in *>(ifa->ifa_addr);
- char buf[INET_ADDRSTRLEN];
- if (inet_ntop(AF_INET, &sa->sin_addr, buf, INET_ADDRSTRLEN)) {
- freeifaddrs(ifap);
- return std::string(buf, INET_ADDRSTRLEN);
- }
- }
- }
- }
- freeifaddrs(ifap);
- return std::string();
-}
-#endif
-
-inline socket_t create_client_socket(const char *host, int port,
- bool tcp_nodelay,
- SocketOptions socket_options,
- time_t timeout_sec, time_t timeout_usec,
- const std::string &intf, std::atomic<Error> &error) {
- auto sock = create_socket(
- host, port, 0, tcp_nodelay, std::move(socket_options),
- [&](socket_t sock, struct addrinfo &ai) -> bool {
- if (!intf.empty()) {
-#ifdef USE_IF2IP
- auto ip = if2ip(intf);
- if (ip.empty()) { ip = intf; }
- if (!bind_ip_address(sock, ip.c_str())) {
- error = Error::BindIPAddress;
- return false;
- }
-#endif
- }
-
- set_nonblocking(sock, true);
-
- auto ret =
- ::connect(sock, ai.ai_addr, static_cast<socklen_t>(ai.ai_addrlen));
-
- if (ret < 0) {
- if (is_connection_error() ||
- !wait_until_socket_is_ready(sock, timeout_sec, timeout_usec)) {
- close_socket(sock);
- error = Error::Connection;
- return false;
- }
- }
-
- set_nonblocking(sock, false);
- error = Error::Success;
- return true;
- });
-
- if (sock != INVALID_SOCKET) {
- error = Error::Success;
- } else {
- if (error == Error::Success) { error = Error::Connection; }
- }
-
- return sock;
-}
-
-inline void get_remote_ip_and_port(const struct sockaddr_storage &addr,
- socklen_t addr_len, std::string &ip,
- int &port) {
- if (addr.ss_family == AF_INET) {
- port = ntohs(reinterpret_cast<const struct sockaddr_in *>(&addr)->sin_port);
- } else if (addr.ss_family == AF_INET6) {
- port =
- ntohs(reinterpret_cast<const struct sockaddr_in6 *>(&addr)->sin6_port);
- }
-
- std::array<char, NI_MAXHOST> ipstr{};
- if (!getnameinfo(reinterpret_cast<const struct sockaddr *>(&addr), addr_len,
- ipstr.data(), static_cast<socklen_t>(ipstr.size()), nullptr,
- 0, NI_NUMERICHOST)) {
- ip = ipstr.data();
- }
-}
-
-inline void get_remote_ip_and_port(socket_t sock, std::string &ip, int &port) {
- struct sockaddr_storage addr;
- socklen_t addr_len = sizeof(addr);
-
- if (!getpeername(sock, reinterpret_cast<struct sockaddr *>(&addr),
- &addr_len)) {
- get_remote_ip_and_port(addr, addr_len, ip, port);
- }
-}
-
-inline const char *
-find_content_type(const std::string &path,
- const std::map<std::string, std::string> &user_data) {
- auto ext = file_extension(path);
-
- auto it = user_data.find(ext);
- if (it != user_data.end()) { return it->second.c_str(); }
-
- if (ext == "txt") {
- return "text/plain";
- } else if (ext == "html" || ext == "htm") {
- return "text/html";
- } else if (ext == "css") {
- return "text/css";
- } else if (ext == "jpeg" || ext == "jpg") {
- return "image/jpg";
- } else if (ext == "png") {
- return "image/png";
- } else if (ext == "gif") {
- return "image/gif";
- } else if (ext == "svg") {
- return "image/svg+xml";
- } else if (ext == "ico") {
- return "image/x-icon";
- } else if (ext == "json") {
- return "application/json";
- } else if (ext == "pdf") {
- return "application/pdf";
- } else if (ext == "js") {
- return "application/javascript";
- } else if (ext == "wasm") {
- return "application/wasm";
- } else if (ext == "xml") {
- return "application/xml";
- } else if (ext == "xhtml") {
- return "application/xhtml+xml";
- }
- return nullptr;
-}
-
-inline const char *status_message(int status) {
- switch (status) {
- case 100: return "Continue";
- case 101: return "Switching Protocol";
- case 102: return "Processing";
- case 103: return "Early Hints";
- case 200: return "OK";
- case 201: return "Created";
- case 202: return "Accepted";
- case 203: return "Non-Authoritative Information";
- case 204: return "No Content";
- case 205: return "Reset Content";
- case 206: return "Partial Content";
- case 207: return "Multi-Status";
- case 208: return "Already Reported";
- case 226: return "IM Used";
- case 300: return "Multiple Choice";
- case 301: return "Moved Permanently";
- case 302: return "Found";
- case 303: return "See Other";
- case 304: return "Not Modified";
- case 305: return "Use Proxy";
- case 306: return "unused";
- case 307: return "Temporary Redirect";
- case 308: return "Permanent Redirect";
- case 400: return "Bad Request";
- case 401: return "Unauthorized";
- case 402: return "Payment Required";
- case 403: return "Forbidden";
- case 404: return "Not Found";
- case 405: return "Method Not Allowed";
- case 406: return "Not Acceptable";
- case 407: return "Proxy Authentication Required";
- case 408: return "Request Timeout";
- case 409: return "Conflict";
- case 410: return "Gone";
- case 411: return "Length Required";
- case 412: return "Precondition Failed";
- case 413: return "Payload Too Large";
- case 414: return "URI Too Long";
- case 415: return "Unsupported Media Type";
- case 416: return "Range Not Satisfiable";
- case 417: return "Expectation Failed";
- case 418: return "I'm a teapot";
- case 421: return "Misdirected Request";
- case 422: return "Unprocessable Entity";
- case 423: return "Locked";
- case 424: return "Failed Dependency";
- case 425: return "Too Early";
- case 426: return "Upgrade Required";
- case 428: return "Precondition Required";
- case 429: return "Too Many Requests";
- case 431: return "Request Header Fields Too Large";
- case 451: return "Unavailable For Legal Reasons";
- case 501: return "Not Implemented";
- case 502: return "Bad Gateway";
- case 503: return "Service Unavailable";
- case 504: return "Gateway Timeout";
- case 505: return "HTTP Version Not Supported";
- case 506: return "Variant Also Negotiates";
- case 507: return "Insufficient Storage";
- case 508: return "Loop Detected";
- case 510: return "Not Extended";
- case 511: return "Network Authentication Required";
-
- default:
- case 500: return "Internal Server Error";
- }
-}
-
-inline bool can_compress_content_type(const std::string &content_type) {
- return (!content_type.find("text/") && content_type != "text/event-stream") ||
- content_type == "image/svg+xml" ||
- content_type == "application/javascript" ||
- content_type == "application/json" ||
- content_type == "application/xml" ||
- content_type == "application/xhtml+xml";
-}
-
-enum class EncodingType { None = 0, Gzip, Brotli };
-
-inline EncodingType encoding_type(const Request &req, const Response &res) {
- auto ret =
- detail::can_compress_content_type(res.get_header_value("Content-Type"));
- if (!ret) { return EncodingType::None; }
-
- const auto &s = req.get_header_value("Accept-Encoding");
- (void)(s);
-
-#ifdef CPPHTTPLIB_BROTLI_SUPPORT
- // TODO: 'Accept-Encoding' has br, not br;q=0
- ret = s.find("br") != std::string::npos;
- if (ret) { return EncodingType::Brotli; }
-#endif
-
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
- // TODO: 'Accept-Encoding' has gzip, not gzip;q=0
- ret = s.find("gzip") != std::string::npos;
- if (ret) { return EncodingType::Gzip; }
-#endif
-
- return EncodingType::None;
-}
-
-class compressor {
-public:
- virtual ~compressor(){};
-
- typedef std::function<bool(const char *data, size_t data_len)> Callback;
- virtual bool compress(const char *data, size_t data_length, bool last,
- Callback callback) = 0;
-};
-
-class decompressor {
-public:
- virtual ~decompressor() {}
-
- virtual bool is_valid() const = 0;
-
- typedef std::function<bool(const char *data, size_t data_len)> Callback;
- virtual bool decompress(const char *data, size_t data_length,
- Callback callback) = 0;
-};
-
-class nocompressor : public compressor {
-public:
- ~nocompressor(){};
-
- bool compress(const char *data, size_t data_length, bool /*last*/,
- Callback callback) override {
- if (!data_length) { return true; }
- return callback(data, data_length);
- }
-};
-
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
-class gzip_compressor : public compressor {
-public:
- gzip_compressor() {
- std::memset(&strm_, 0, sizeof(strm_));
- strm_.zalloc = Z_NULL;
- strm_.zfree = Z_NULL;
- strm_.opaque = Z_NULL;
-
- is_valid_ = deflateInit2(&strm_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
- Z_DEFAULT_STRATEGY) == Z_OK;
- }
-
- ~gzip_compressor() { deflateEnd(&strm_); }
-
- bool compress(const char *data, size_t data_length, bool last,
- Callback callback) override {
- assert(is_valid_);
-
- auto flush = last ? Z_FINISH : Z_NO_FLUSH;
-
- strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
- strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
-
- int ret = Z_OK;
-
- std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
- do {
- strm_.avail_out = buff.size();
- strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
-
- ret = deflate(&strm_, flush);
- assert(ret != Z_STREAM_ERROR);
-
- if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
- return false;
- }
- } while (strm_.avail_out == 0);
-
- assert((last && ret == Z_STREAM_END) || (!last && ret == Z_OK));
- assert(strm_.avail_in == 0);
- return true;
- }
-
-private:
- bool is_valid_ = false;
- z_stream strm_;
-};
-
-class gzip_decompressor : public decompressor {
-public:
- gzip_decompressor() {
- std::memset(&strm_, 0, sizeof(strm_));
- strm_.zalloc = Z_NULL;
- strm_.zfree = Z_NULL;
- strm_.opaque = Z_NULL;
-
- // 15 is the value of wbits, which should be at the maximum possible value
- // to ensure that any gzip stream can be decoded. The offset of 32 specifies
- // that the stream type should be automatically detected either gzip or
- // deflate.
- is_valid_ = inflateInit2(&strm_, 32 + 15) == Z_OK;
- }
-
- ~gzip_decompressor() { inflateEnd(&strm_); }
-
- bool is_valid() const override { return is_valid_; }
-
- bool decompress(const char *data, size_t data_length,
- Callback callback) override {
- assert(is_valid_);
-
- int ret = Z_OK;
-
- strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
- strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
-
- std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
- while (strm_.avail_in > 0) {
- strm_.avail_out = buff.size();
- strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
-
- ret = inflate(&strm_, Z_NO_FLUSH);
- assert(ret != Z_STREAM_ERROR);
- switch (ret) {
- case Z_NEED_DICT:
- case Z_DATA_ERROR:
- case Z_MEM_ERROR: inflateEnd(&strm_); return false;
- }
-
- if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
- return false;
- }
- }
-
- return ret == Z_OK || ret == Z_STREAM_END;
- }
-
-private:
- bool is_valid_ = false;
- z_stream strm_;
-};
-#endif
-
-#ifdef CPPHTTPLIB_BROTLI_SUPPORT
-class brotli_compressor : public compressor {
-public:
- brotli_compressor() {
- state_ = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
- }
-
- ~brotli_compressor() { BrotliEncoderDestroyInstance(state_); }
-
- bool compress(const char *data, size_t data_length, bool last,
- Callback callback) override {
- std::array<uint8_t, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
-
- auto operation = last ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS;
- auto available_in = data_length;
- auto next_in = reinterpret_cast<const uint8_t *>(data);
-
- for (;;) {
- if (last) {
- if (BrotliEncoderIsFinished(state_)) { break; }
- } else {
- if (!available_in) { break; }
- }
-
- auto available_out = buff.size();
- auto next_out = buff.data();
-
- if (!BrotliEncoderCompressStream(state_, operation, &available_in,
- &next_in, &available_out, &next_out,
- nullptr)) {
- return false;
- }
-
- auto output_bytes = buff.size() - available_out;
- if (output_bytes) {
- callback(reinterpret_cast<const char *>(buff.data()), output_bytes);
- }
- }
-
- return true;
- }
-
-private:
- BrotliEncoderState *state_ = nullptr;
-};
-
-class brotli_decompressor : public decompressor {
-public:
- brotli_decompressor() {
- decoder_s = BrotliDecoderCreateInstance(0, 0, 0);
- decoder_r = decoder_s ? BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT
- : BROTLI_DECODER_RESULT_ERROR;
- }
-
- ~brotli_decompressor() {
- if (decoder_s) { BrotliDecoderDestroyInstance(decoder_s); }
- }
-
- bool is_valid() const override { return decoder_s; }
-
- bool decompress(const char *data, size_t data_length,
- Callback callback) override {
- if (decoder_r == BROTLI_DECODER_RESULT_SUCCESS ||
- decoder_r == BROTLI_DECODER_RESULT_ERROR) {
- return 0;
- }
-
- const uint8_t *next_in = (const uint8_t *)data;
- size_t avail_in = data_length;
- size_t total_out;
-
- decoder_r = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
-
- std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
- while (decoder_r == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
- char *next_out = buff.data();
- size_t avail_out = buff.size();
-
- decoder_r = BrotliDecoderDecompressStream(
- decoder_s, &avail_in, &next_in, &avail_out,
- reinterpret_cast<uint8_t **>(&next_out), &total_out);
-
- if (decoder_r == BROTLI_DECODER_RESULT_ERROR) { return false; }
-
- if (!callback(buff.data(), buff.size() - avail_out)) { return false; }
- }
-
- return decoder_r == BROTLI_DECODER_RESULT_SUCCESS ||
- decoder_r == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
- }
-
-private:
- BrotliDecoderResult decoder_r;
- BrotliDecoderState *decoder_s = nullptr;
-};
-#endif
-
-inline bool has_header(const Headers &headers, const char *key) {
- return headers.find(key) != headers.end();
-}
-
-inline const char *get_header_value(const Headers &headers, const char *key,
- size_t id = 0, const char *def = nullptr) {
- auto rng = headers.equal_range(key);
- auto it = rng.first;
- std::advance(it, static_cast<ssize_t>(id));
- if (it != rng.second) { return it->second.c_str(); }
- return def;
-}
-
-template <typename T>
-inline T get_header_value(const Headers & /*headers*/, const char * /*key*/,
- size_t /*id*/ = 0, uint64_t /*def*/ = 0) {}
-
-template <>
-inline uint64_t get_header_value<uint64_t>(const Headers &headers,
- const char *key, size_t id,
- uint64_t def) {
- auto rng = headers.equal_range(key);
- auto it = rng.first;
- std::advance(it, static_cast<ssize_t>(id));
- if (it != rng.second) {
- return std::strtoull(it->second.data(), nullptr, 10);
- }
- return def;
-}
-
-template <typename T>
-inline bool parse_header(const char *beg, const char *end, T fn) {
- // Skip trailing spaces and tabs.
- while (beg < end && is_space_or_tab(end[-1])) {
- end--;
- }
-
- auto p = beg;
- while (p < end && *p != ':') {
- p++;
- }
-
- if (p == end) { return false; }
-
- auto key_end = p;
-
- if (*p++ != ':') { return false; }
-
- while (p < end && is_space_or_tab(*p)) {
- p++;
- }
-
- if (p < end) {
- fn(std::string(beg, key_end), decode_url(std::string(p, end), false));
- return true;
- }
-
- return false;
-}
-
-inline bool read_headers(Stream &strm, Headers &headers) {
- const auto bufsiz = 2048;
- char buf[bufsiz];
- stream_line_reader line_reader(strm, buf, bufsiz);
-
- for (;;) {
- if (!line_reader.getline()) { return false; }
-
- // Check if the line ends with CRLF.
- if (line_reader.end_with_crlf()) {
- // Blank line indicates end of headers.
- if (line_reader.size() == 2) { break; }
- } else {
- continue; // Skip invalid line.
- }
-
- // Exclude CRLF
- auto end = line_reader.ptr() + line_reader.size() - 2;
-
- parse_header(line_reader.ptr(), end,
- [&](std::string &&key, std::string &&val) {
- headers.emplace(std::move(key), std::move(val));
- });
- }
-
- return true;
-}
-
-inline bool read_content_with_length(Stream &strm, uint64_t len,
- Progress progress,
- ContentReceiverWithProgress out) {
- char buf[CPPHTTPLIB_RECV_BUFSIZ];
-
- uint64_t r = 0;
- while (r < len) {
- auto read_len = static_cast<size_t>(len - r);
- auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
- if (n <= 0) { return false; }
-
- if (!out(buf, static_cast<size_t>(n), r, len)) { return false; }
- r += static_cast<uint64_t>(n);
-
- if (progress) {
- if (!progress(r, len)) { return false; }
- }
- }
-
- return true;
-}
-
-inline void skip_content_with_length(Stream &strm, uint64_t len) {
- char buf[CPPHTTPLIB_RECV_BUFSIZ];
- uint64_t r = 0;
- while (r < len) {
- auto read_len = static_cast<size_t>(len - r);
- auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
- if (n <= 0) { return; }
- r += static_cast<uint64_t>(n);
- }
-}
-
-inline bool read_content_without_length(Stream &strm,
- ContentReceiverWithProgress out) {
- char buf[CPPHTTPLIB_RECV_BUFSIZ];
- uint64_t r = 0;
- for (;;) {
- auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ);
- if (n < 0) {
- return false;
- } else if (n == 0) {
- return true;
- }
-
- if (!out(buf, static_cast<size_t>(n), r, 0)) { return false; }
- r += static_cast<uint64_t>(n);
- }
-
- return true;
-}
-
-inline bool read_content_chunked(Stream &strm,
- ContentReceiverWithProgress out) {
- const auto bufsiz = 16;
- char buf[bufsiz];
-
- stream_line_reader line_reader(strm, buf, bufsiz);
-
- if (!line_reader.getline()) { return false; }
-
- unsigned long chunk_len;
- while (true) {
- char *end_ptr;
-
- chunk_len = std::strtoul(line_reader.ptr(), &end_ptr, 16);
-
- if (end_ptr == line_reader.ptr()) { return false; }
- if (chunk_len == ULONG_MAX) { return false; }
-
- if (chunk_len == 0) { break; }
-
- if (!read_content_with_length(strm, chunk_len, nullptr, out)) {
- return false;
- }
-
- if (!line_reader.getline()) { return false; }
-
- if (strcmp(line_reader.ptr(), "\r\n")) { break; }
-
- if (!line_reader.getline()) { return false; }
- }
-
- if (chunk_len == 0) {
- // Reader terminator after chunks
- if (!line_reader.getline() || strcmp(line_reader.ptr(), "\r\n"))
- return false;
- }
-
- return true;
-}
-
-inline bool is_chunked_transfer_encoding(const Headers &headers) {
- return !strcasecmp(get_header_value(headers, "Transfer-Encoding", 0, ""),
- "chunked");
-}
-
-template <typename T, typename U>
-bool prepare_content_receiver(T &x, int &status,
- ContentReceiverWithProgress receiver,
- bool decompress, U callback) {
- if (decompress) {
- std::string encoding = x.get_header_value("Content-Encoding");
- std::unique_ptr<decompressor> decompressor;
-
- if (encoding.find("gzip") != std::string::npos ||
- encoding.find("deflate") != std::string::npos) {
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
- decompressor = detail::make_unique<gzip_decompressor>();
-#else
- status = 415;
- return false;
-#endif
- } else if (encoding.find("br") != std::string::npos) {
-#ifdef CPPHTTPLIB_BROTLI_SUPPORT
- decompressor = detail::make_unique<brotli_decompressor>();
-#else
- status = 415;
- return false;
-#endif
- }
-
- if (decompressor) {
- if (decompressor->is_valid()) {
- ContentReceiverWithProgress out = [&](const char *buf, size_t n,
- uint64_t off, uint64_t len) {
- return decompressor->decompress(buf, n,
- [&](const char *buf, size_t n) {
- return receiver(buf, n, off, len);
- });
- };
- return callback(std::move(out));
- } else {
- status = 500;
- return false;
- }
- }
- }
-
- ContentReceiverWithProgress out = [&](const char *buf, size_t n, uint64_t off,
- uint64_t len) {
- return receiver(buf, n, off, len);
- };
- return callback(std::move(out));
-}
-
-template <typename T>
-bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status,
- Progress progress, ContentReceiverWithProgress receiver,
- bool decompress) {
- return prepare_content_receiver(
- x, status, std::move(receiver), decompress,
- [&](const ContentReceiverWithProgress &out) {
- auto ret = true;
- auto exceed_payload_max_length = false;
-
- if (is_chunked_transfer_encoding(x.headers)) {
- ret = read_content_chunked(strm, out);
- } else if (!has_header(x.headers, "Content-Length")) {
- ret = read_content_without_length(strm, out);
- } else {
- auto len = get_header_value<uint64_t>(x.headers, "Content-Length");
- if (len > payload_max_length) {
- exceed_payload_max_length = true;
- skip_content_with_length(strm, len);
- ret = false;
- } else if (len > 0) {
- ret = read_content_with_length(strm, len, std::move(progress), out);
- }
- }
-
- if (!ret) { status = exceed_payload_max_length ? 413 : 400; }
- return ret;
- });
-}
-
-template <typename T>
-inline ssize_t write_headers(Stream &strm, const T &info,
- const Headers &headers) {
- ssize_t write_len = 0;
- for (const auto &x : info.headers) {
- if (x.first == "EXCEPTION_WHAT") { continue; }
- auto len =
- strm.write_format("%s: %s\r\n", x.first.c_str(), x.second.c_str());
- if (len < 0) { return len; }
- write_len += len;
- }
- for (const auto &x : headers) {
- auto len =
- strm.write_format("%s: %s\r\n", x.first.c_str(), x.second.c_str());
- if (len < 0) { return len; }
- write_len += len;
- }
- auto len = strm.write("\r\n");
- if (len < 0) { return len; }
- write_len += len;
- return write_len;
-}
-
-inline bool write_data(Stream &strm, const char *d, size_t l) {
- size_t offset = 0;
- while (offset < l) {
- auto length = strm.write(d + offset, l - offset);
- if (length < 0) { return false; }
- offset += static_cast<size_t>(length);
- }
- return true;
-}
-
-template <typename T>
-inline ssize_t write_content(Stream &strm, ContentProvider content_provider,
- size_t offset, size_t length, T is_shutting_down) {
- size_t begin_offset = offset;
- size_t end_offset = offset + length;
- auto ok = true;
- DataSink data_sink;
-
- data_sink.write = [&](const char *d, size_t l) {
- if (ok) {
- offset += l;
- if (!write_data(strm, d, l)) { ok = false; }
- }
- };
-
- data_sink.is_writable = [&](void) { return ok && strm.is_writable(); };
-
- while (offset < end_offset && !is_shutting_down()) {
- if (!content_provider(offset, end_offset - offset, data_sink)) {
- return -1;
- }
- if (!ok) { return -1; }
- }
-
- return static_cast<ssize_t>(offset - begin_offset);
-}
-
-template <typename T>
-inline ssize_t write_content_without_length(Stream &strm,
- ContentProvider content_provider,
- T is_shutting_down) {
- size_t offset = 0;
- auto data_available = true;
- auto ok = true;
- DataSink data_sink;
-
- data_sink.write = [&](const char *d, size_t l) {
- if (ok) {
- offset += l;
- if (!write_data(strm, d, l)) { ok = false; }
- }
- };
-
- data_sink.done = [&](void) { data_available = false; };
-
- data_sink.is_writable = [&](void) { return ok && strm.is_writable(); };
-
- while (data_available && !is_shutting_down()) {
- if (!content_provider(offset, 0, data_sink)) { return -1; }
- if (!ok) { return -1; }
- }
-
- return static_cast<ssize_t>(offset);
-}
-
-template <typename T, typename U>
-inline ssize_t write_content_chunked(Stream &strm,
- ContentProvider content_provider,
- T is_shutting_down, U &compressor) {
- size_t offset = 0;
- auto data_available = true;
- ssize_t total_written_length = 0;
- auto ok = true;
- DataSink data_sink;
-
- data_sink.write = [&](const char *d, size_t l) {
- if (!ok) { return; }
-
- data_available = l > 0;
- offset += l;
-
- std::string payload;
- if (!compressor.compress(d, l, false,
- [&](const char *data, size_t data_len) {
- payload.append(data, data_len);
- return true;
- })) {
- ok = false;
- return;
- }
-
- if (!payload.empty()) {
- // Emit chunked response header and footer for each chunk
- auto chunk = from_i_to_hex(payload.size()) + "\r\n" + payload + "\r\n";
- if (write_data(strm, chunk.data(), chunk.size())) {
- total_written_length += chunk.size();
- } else {
- ok = false;
- return;
- }
- }
- };
-
- data_sink.done = [&](void) {
- if (!ok) { return; }
-
- data_available = false;
-
- std::string payload;
- if (!compressor.compress(nullptr, 0, true,
- [&](const char *data, size_t data_len) {
- payload.append(data, data_len);
- return true;
- })) {
- ok = false;
- return;
- }
-
- if (!payload.empty()) {
- // Emit chunked response header and footer for each chunk
- auto chunk = from_i_to_hex(payload.size()) + "\r\n" + payload + "\r\n";
- if (write_data(strm, chunk.data(), chunk.size())) {
- total_written_length += chunk.size();
- } else {
- ok = false;
- return;
- }
- }
-
- static const std::string done_marker("0\r\n\r\n");
- if (write_data(strm, done_marker.data(), done_marker.size())) {
- total_written_length += done_marker.size();
- } else {
- ok = false;
- }
- };
-
- data_sink.is_writable = [&](void) { return ok && strm.is_writable(); };
-
- while (data_available && !is_shutting_down()) {
- if (!content_provider(offset, 0, data_sink)) { return -1; }
- if (!ok) { return -1; }
- }
-
- return total_written_length;
-}
-
-template <typename T>
-inline bool redirect(T &cli, const Request &req, Response &res,
- const std::string &path) {
- Request new_req = req;
- new_req.path = path;
- new_req.redirect_count -= 1;
-
- if (res.status == 303 && (req.method != "GET" && req.method != "HEAD")) {
- new_req.method = "GET";
- new_req.body.clear();
- new_req.headers.clear();
- }
-
- Response new_res;
-
- auto ret = cli.send(new_req, new_res);
- if (ret) { res = new_res; }
- return ret;
-}
-
-inline std::string params_to_query_str(const Params &params) {
- std::string query;
-
- for (auto it = params.begin(); it != params.end(); ++it) {
- if (it != params.begin()) { query += "&"; }
- query += it->first;
- query += "=";
- query += encode_url(it->second);
- }
- return query;
-}
-
-inline void parse_query_text(const std::string &s, Params &params) {
- split(s.data(), s.data() + s.size(), '&', [&](const char *b, const char *e) {
- std::string key;
- std::string val;
- split(b, e, '=', [&](const char *b2, const char *e2) {
- if (key.empty()) {
- key.assign(b2, e2);
- } else {
- val.assign(b2, e2);
- }
- });
-
- if (!key.empty()) {
- params.emplace(decode_url(key, true), decode_url(val, true));
- }
- });
-}
-
-inline bool parse_multipart_boundary(const std::string &content_type,
- std::string &boundary) {
- auto pos = content_type.find("boundary=");
- if (pos == std::string::npos) { return false; }
- boundary = content_type.substr(pos + 9);
- if (boundary.length() >= 2 && boundary.front() == '"' &&
- boundary.back() == '"') {
- boundary = boundary.substr(1, boundary.size() - 2);
- }
- return !boundary.empty();
-}
-
-inline bool parse_range_header(const std::string &s, Ranges &ranges) try {
- static auto re_first_range = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))");
- std::smatch m;
- if (std::regex_match(s, m, re_first_range)) {
- auto pos = static_cast<size_t>(m.position(1));
- auto len = static_cast<size_t>(m.length(1));
- bool all_valid_ranges = true;
- split(&s[pos], &s[pos + len], ',', [&](const char *b, const char *e) {
- if (!all_valid_ranges) return;
- static auto re_another_range = std::regex(R"(\s*(\d*)-(\d*))");
- std::cmatch cm;
- if (std::regex_match(b, e, cm, re_another_range)) {
- ssize_t first = -1;
- if (!cm.str(1).empty()) {
- first = static_cast<ssize_t>(std::stoll(cm.str(1)));
- }
-
- ssize_t last = -1;
- if (!cm.str(2).empty()) {
- last = static_cast<ssize_t>(std::stoll(cm.str(2)));
- }
-
- if (first != -1 && last != -1 && first > last) {
- all_valid_ranges = false;
- return;
- }
- ranges.emplace_back(std::make_pair(first, last));
- }
- });
- return all_valid_ranges;
- }
- return false;
-} catch (...) { return false; }
-
-class MultipartFormDataParser {
-public:
- MultipartFormDataParser() = default;
-
- void set_boundary(std::string &&boundary) { boundary_ = boundary; }
-
- bool is_valid() const { return is_valid_; }
-
- template <typename T, typename U>
- bool parse(const char *buf, size_t n, const T &content_callback,
- const U &header_callback) {
-
- static const std::regex re_content_disposition(
- "^Content-Disposition:\\s*form-data;\\s*name=\"(.*?)\"(?:;\\s*filename="
- "\"(.*?)\")?\\s*$",
- std::regex_constants::icase);
- static const std::string dash_ = "--";
- static const std::string crlf_ = "\r\n";
-
- buf_.append(buf, n); // TODO: performance improvement
-
- while (!buf_.empty()) {
- switch (state_) {
- case 0: { // Initial boundary
- auto pattern = dash_ + boundary_ + crlf_;
- if (pattern.size() > buf_.size()) { return true; }
- auto pos = buf_.find(pattern);
- if (pos != 0) { return false; }
- buf_.erase(0, pattern.size());
- off_ += pattern.size();
- state_ = 1;
- break;
- }
- case 1: { // New entry
- clear_file_info();
- state_ = 2;
- break;
- }
- case 2: { // Headers
- auto pos = buf_.find(crlf_);
- while (pos != std::string::npos) {
- // Empty line
- if (pos == 0) {
- if (!header_callback(file_)) {
- is_valid_ = false;
- return false;
- }
- buf_.erase(0, crlf_.size());
- off_ += crlf_.size();
- state_ = 3;
- break;
- }
-
- static const std::string header_name = "content-type:";
- const auto header = buf_.substr(0, pos);
- if (start_with(header, header_name)) {
- file_.content_type = trim_copy(header.substr(header_name.size()));
- } else {
- std::smatch m;
- if (std::regex_match(header, m, re_content_disposition)) {
- file_.name = m[1];
- file_.filename = m[2];
- }
- }
-
- buf_.erase(0, pos + crlf_.size());
- off_ += pos + crlf_.size();
- pos = buf_.find(crlf_);
- }
- if (state_ != 3) { return true; }
- break;
- }
- case 3: { // Body
- {
- auto pattern = crlf_ + dash_;
- if (pattern.size() > buf_.size()) { return true; }
-
- auto pos = buf_.find(pattern);
- if (pos == std::string::npos) {
- pos = buf_.size();
- while (pos > 0) {
- auto c = buf_[pos - 1];
- if (c != '\r' && c != '\n' && c != '-') { break; }
- pos--;
- }
- }
-
- if (!content_callback(buf_.data(), pos)) {
- is_valid_ = false;
- return false;
- }
-
- off_ += pos;
- buf_.erase(0, pos);
- }
-
- {
- auto pattern = crlf_ + dash_ + boundary_;
- if (pattern.size() > buf_.size()) { return true; }
-
- auto pos = buf_.find(pattern);
- if (pos != std::string::npos) {
- if (!content_callback(buf_.data(), pos)) {
- is_valid_ = false;
- return false;
- }
-
- off_ += pos + pattern.size();
- buf_.erase(0, pos + pattern.size());
- state_ = 4;
- } else {
- if (!content_callback(buf_.data(), pattern.size())) {
- is_valid_ = false;
- return false;
- }
-
- off_ += pattern.size();
- buf_.erase(0, pattern.size());
- }
- }
- break;
- }
- case 4: { // Boundary
- if (crlf_.size() > buf_.size()) { return true; }
- if (buf_.compare(0, crlf_.size(), crlf_) == 0) {
- buf_.erase(0, crlf_.size());
- off_ += crlf_.size();
- state_ = 1;
- } else {
- auto pattern = dash_ + crlf_;
- if (pattern.size() > buf_.size()) { return true; }
- if (buf_.compare(0, pattern.size(), pattern) == 0) {
- buf_.erase(0, pattern.size());
- off_ += pattern.size();
- is_valid_ = true;
- state_ = 5;
- } else {
- return true;
- }
- }
- break;
- }
- case 5: { // Done
- is_valid_ = false;
- return false;
- }
- }
- }
-
- return true;
- }
-
-private:
- void clear_file_info() {
- file_.name.clear();
- file_.filename.clear();
- file_.content_type.clear();
- }
-
- std::string boundary_;
-
- std::string buf_;
- size_t state_ = 0;
- bool is_valid_ = false;
- size_t off_ = 0;
- MultipartFormData file_;
-};
-
-inline std::string to_lower(const char *beg, const char *end) {
- std::string out;
- auto it = beg;
- while (it != end) {
- out += static_cast<char>(::tolower(*it));
- it++;
- }
- return out;
-}
-
-inline std::string make_multipart_data_boundary() {
- static const char data[] =
- "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-
- // std::random_device might actually be deterministic on some
- // platforms, but due to lack of support in the c++ standard library,
- // doing better requires either some ugly hacks or breaking portability.
- std::random_device seed_gen;
- // Request 128 bits of entropy for initialization
- std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), seed_gen()};
- std::mt19937 engine(seed_sequence);
-
- std::string result = "--cpp-httplib-multipart-data-";
-
- for (auto i = 0; i < 16; i++) {
- result += data[engine() % (sizeof(data) - 1)];
- }
-
- return result;
-}
-
-inline std::pair<size_t, size_t>
-get_range_offset_and_length(const Request &req, size_t content_length,
- size_t index) {
- auto r = req.ranges[index];
-
- if (r.first == -1 && r.second == -1) {
- return std::make_pair(0, content_length);
- }
-
- auto slen = static_cast<ssize_t>(content_length);
-
- if (r.first == -1) {
- r.first = (std::max)(static_cast<ssize_t>(0), slen - r.second);
- r.second = slen - 1;
- }
-
- if (r.second == -1) { r.second = slen - 1; }
-
- return std::make_pair(r.first, r.second - r.first + 1);
-}
-
-inline std::string make_content_range_header_field(size_t offset, size_t length,
- size_t content_length) {
- std::string field = "bytes ";
- field += std::to_string(offset);
- field += "-";
- field += std::to_string(offset + length - 1);
- field += "/";
- field += std::to_string(content_length);
- return field;
-}
-
-template <typename SToken, typename CToken, typename Content>
-bool process_multipart_ranges_data(const Request &req, Response &res,
- const std::string &boundary,
- const std::string &content_type,
- SToken stoken, CToken ctoken,
- Content content) {
- for (size_t i = 0; i < req.ranges.size(); i++) {
- ctoken("--");
- stoken(boundary);
- ctoken("\r\n");
- if (!content_type.empty()) {
- ctoken("Content-Type: ");
- stoken(content_type);
- ctoken("\r\n");
- }
-
- auto offsets = get_range_offset_and_length(req, res.body.size(), i);
- auto offset = offsets.first;
- auto length = offsets.second;
-
- ctoken("Content-Range: ");
- stoken(make_content_range_header_field(offset, length, res.body.size()));
- ctoken("\r\n");
- ctoken("\r\n");
- if (!content(offset, length)) { return false; }
- ctoken("\r\n");
- }
-
- ctoken("--");
- stoken(boundary);
- ctoken("--\r\n");
-
- return true;
-}
-
-inline std::string make_multipart_ranges_data(const Request &req, Response &res,
- const std::string &boundary,
- const std::string &content_type) {
- std::string data;
-
- process_multipart_ranges_data(
- req, res, boundary, content_type,
- [&](const std::string &token) { data += token; },
- [&](const char *token) { data += token; },
- [&](size_t offset, size_t length) {
- data += res.body.substr(offset, length);
- return true;
- });
-
- return data;
-}
-
-inline size_t
-get_multipart_ranges_data_length(const Request &req, Response &res,
- const std::string &boundary,
- const std::string &content_type) {
- size_t data_length = 0;
-
- process_multipart_ranges_data(
- req, res, boundary, content_type,
- [&](const std::string &token) { data_length += token.size(); },
- [&](const char *token) { data_length += strlen(token); },
- [&](size_t /*offset*/, size_t length) {
- data_length += length;
- return true;
- });
-
- return data_length;
-}
-
-template <typename T>
-inline bool write_multipart_ranges_data(Stream &strm, const Request &req,
- Response &res,
- const std::string &boundary,
- const std::string &content_type,
- T is_shutting_down) {
- return process_multipart_ranges_data(
- req, res, boundary, content_type,
- [&](const std::string &token) { strm.write(token); },
- [&](const char *token) { strm.write(token); },
- [&](size_t offset, size_t length) {
- return write_content(strm, res.content_provider_, offset, length,
- is_shutting_down) >= 0;
- });
-}
-
-inline std::pair<size_t, size_t>
-get_range_offset_and_length(const Request &req, const Response &res,
- size_t index) {
- auto r = req.ranges[index];
-
- if (r.second == -1) {
- r.second = static_cast<ssize_t>(res.content_length_) - 1;
- }
-
- return std::make_pair(r.first, r.second - r.first + 1);
-}
-
-inline bool expect_content(const Request &req) {
- if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH" ||
- req.method == "PRI" || req.method == "DELETE") {
- return true;
- }
- // TODO: check if Content-Length is set
- return false;
-}
-
-inline bool has_crlf(const char *s) {
- auto p = s;
- while (*p) {
- if (*p == '\r' || *p == '\n') { return true; }
- p++;
- }
- return false;
-}
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-template <typename CTX, typename Init, typename Update, typename Final>
-inline std::string message_digest(const std::string &s, Init init,
- Update update, Final final,
- size_t digest_length) {
- using namespace std;
-
- std::vector<unsigned char> md(digest_length, 0);
- CTX ctx;
- init(&ctx);
- update(&ctx, s.data(), s.size());
- final(md.data(), &ctx);
-
- stringstream ss;
- for (auto c : md) {
- ss << setfill('0') << setw(2) << hex << (unsigned int)c;
- }
- return ss.str();
-}
-
-inline std::string MD5(const std::string &s) {
- return message_digest<MD5_CTX>(s, MD5_Init, MD5_Update, MD5_Final,
- MD5_DIGEST_LENGTH);
-}
-
-inline std::string SHA_256(const std::string &s) {
- return message_digest<SHA256_CTX>(s, SHA256_Init, SHA256_Update, SHA256_Final,
- SHA256_DIGEST_LENGTH);
-}
-
-inline std::string SHA_512(const std::string &s) {
- return message_digest<SHA512_CTX>(s, SHA512_Init, SHA512_Update, SHA512_Final,
- SHA512_DIGEST_LENGTH);
-}
-#endif
-
-#ifdef _WIN32
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-// NOTE: This code came up with the following stackoverflow post:
-// https://stackoverflow.com/questions/9507184/can-openssl-on-windows-use-the-system-certificate-store
-inline bool load_system_certs_on_windows(X509_STORE *store) {
- auto hStore = CertOpenSystemStoreW((HCRYPTPROV_LEGACY)NULL, L"ROOT");
-
- if (!hStore) { return false; }
-
- PCCERT_CONTEXT pContext = NULL;
- while (pContext = CertEnumCertificatesInStore(hStore, pContext)) {
- auto encoded_cert =
- static_cast<const unsigned char *>(pContext->pbCertEncoded);
-
- auto x509 = d2i_X509(NULL, &encoded_cert, pContext->cbCertEncoded);
- if (x509) {
- X509_STORE_add_cert(store, x509);
- X509_free(x509);
- }
- }
-
- CertFreeCertificateContext(pContext);
- CertCloseStore(hStore, 0);
-
- return true;
-}
-#endif
-
-class WSInit {
-public:
- WSInit() {
- WSADATA wsaData;
- WSAStartup(0x0002, &wsaData);
- }
-
- ~WSInit() { WSACleanup(); }
-};
-
-static WSInit wsinit_;
-#endif
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline std::pair<std::string, std::string> make_digest_authentication_header(
- const Request &req, const std::map<std::string, std::string> &auth,
- size_t cnonce_count, const std::string &cnonce, const std::string &username,
- const std::string &password, bool is_proxy = false) {
- using namespace std;
-
- string nc;
- {
- stringstream ss;
- ss << setfill('0') << setw(8) << hex << cnonce_count;
- nc = ss.str();
- }
-
- auto qop = auth.at("qop");
- if (qop.find("auth-int") != std::string::npos) {
- qop = "auth-int";
- } else {
- qop = "auth";
- }
-
- std::string algo = "MD5";
- if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); }
-
- string response;
- {
- auto H = algo == "SHA-256"
- ? detail::SHA_256
- : algo == "SHA-512" ? detail::SHA_512 : detail::MD5;
-
- auto A1 = username + ":" + auth.at("realm") + ":" + password;
-
- auto A2 = req.method + ":" + req.path;
- if (qop == "auth-int") { A2 += ":" + H(req.body); }
-
- response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce +
- ":" + qop + ":" + H(A2));
- }
-
- auto field = "Digest username=\"" + username + "\", realm=\"" +
- auth.at("realm") + "\", nonce=\"" + auth.at("nonce") +
- "\", uri=\"" + req.path + "\", algorithm=" + algo +
- ", qop=" + qop + ", nc=\"" + nc + "\", cnonce=\"" + cnonce +
- "\", response=\"" + response + "\"";
-
- auto key = is_proxy ? "Proxy-Authorization" : "Authorization";
- return std::make_pair(key, field);
-}
-#endif
-
-inline bool parse_www_authenticate(const Response &res,
- std::map<std::string, std::string> &auth,
- bool is_proxy) {
- auto auth_key = is_proxy ? "Proxy-Authenticate" : "WWW-Authenticate";
- if (res.has_header(auth_key)) {
- static auto re = std::regex(R"~((?:(?:,\s*)?(.+?)=(?:"(.*?)"|([^,]*))))~");
- auto s = res.get_header_value(auth_key);
- auto pos = s.find(' ');
- if (pos != std::string::npos) {
- auto type = s.substr(0, pos);
- if (type == "Basic") {
- return false;
- } else if (type == "Digest") {
- s = s.substr(pos + 1);
- auto beg = std::sregex_iterator(s.begin(), s.end(), re);
- for (auto i = beg; i != std::sregex_iterator(); ++i) {
- auto m = *i;
- auto key = s.substr(static_cast<size_t>(m.position(1)),
- static_cast<size_t>(m.length(1)));
- auto val = m.length(2) > 0
- ? s.substr(static_cast<size_t>(m.position(2)),
- static_cast<size_t>(m.length(2)))
- : s.substr(static_cast<size_t>(m.position(3)),
- static_cast<size_t>(m.length(3)));
- auth[key] = val;
- }
- return true;
- }
- }
- }
- return false;
-}
-
-// https://stackoverflow.com/questions/440133/how-do-i-create-a-random-alpha-numeric-string-in-c/440240#answer-440240
-inline std::string random_string(size_t length) {
- auto randchar = []() -> char {
- const char charset[] = "0123456789"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz";
- const size_t max_index = (sizeof(charset) - 1);
- return charset[static_cast<size_t>(rand()) % max_index];
- };
- std::string str(length, 0);
- std::generate_n(str.begin(), length, randchar);
- return str;
-}
-
-class ContentProviderAdapter {
-public:
- explicit ContentProviderAdapter(
- ContentProviderWithoutLength &&content_provider)
- : content_provider_(content_provider) {}
-
- bool operator()(size_t offset, size_t, DataSink &sink) {
- return content_provider_(offset, sink);
- }
-
-private:
- ContentProviderWithoutLength content_provider_;
-};
-
-} // namespace detail
-
-// Header utilities
-inline std::pair<std::string, std::string> make_range_header(Ranges ranges) {
- std::string field = "bytes=";
- auto i = 0;
- for (auto r : ranges) {
- if (i != 0) { field += ", "; }
- if (r.first != -1) { field += std::to_string(r.first); }
- field += '-';
- if (r.second != -1) { field += std::to_string(r.second); }
- i++;
- }
- return std::make_pair("Range", std::move(field));
-}
-
-inline std::pair<std::string, std::string>
-make_basic_authentication_header(const std::string &username,
- const std::string &password,
- bool is_proxy = false) {
- auto field = "Basic " + detail::base64_encode(username + ":" + password);
- auto key = is_proxy ? "Proxy-Authorization" : "Authorization";
- return std::make_pair(key, std::move(field));
-}
-
-inline std::pair<std::string, std::string>
-make_bearer_token_authentication_header(const std::string &token,
- bool is_proxy = false) {
- auto field = "Bearer " + token;
- auto key = is_proxy ? "Proxy-Authorization" : "Authorization";
- return std::make_pair(key, std::move(field));
-}
-
-// Request implementation
-inline bool Request::has_header(const char *key) const {
- return detail::has_header(headers, key);
-}
-
-inline std::string Request::get_header_value(const char *key, size_t id) const {
- return detail::get_header_value(headers, key, id, "");
-}
-
-template <typename T>
-inline T Request::get_header_value(const char *key, size_t id) const {
- return detail::get_header_value<T>(headers, key, id, 0);
-}
-
-inline size_t Request::get_header_value_count(const char *key) const {
- auto r = headers.equal_range(key);
- return static_cast<size_t>(std::distance(r.first, r.second));
-}
-
-inline void Request::set_header(const char *key, const char *val) {
- if (!detail::has_crlf(key) && !detail::has_crlf(val)) {
- headers.emplace(key, val);
- }
-}
-
-inline void Request::set_header(const char *key, const std::string &val) {
- if (!detail::has_crlf(key) && !detail::has_crlf(val.c_str())) {
- headers.emplace(key, val);
- }
-}
-
-inline bool Request::has_param(const char *key) const {
- return params.find(key) != params.end();
-}
-
-inline std::string Request::get_param_value(const char *key, size_t id) const {
- auto rng = params.equal_range(key);
- auto it = rng.first;
- std::advance(it, static_cast<ssize_t>(id));
- if (it != rng.second) { return it->second; }
- return std::string();
-}
-
-inline size_t Request::get_param_value_count(const char *key) const {
- auto r = params.equal_range(key);
- return static_cast<size_t>(std::distance(r.first, r.second));
-}
-
-inline bool Request::is_multipart_form_data() const {
- const auto &content_type = get_header_value("Content-Type");
- return !content_type.find("multipart/form-data");
-}
-
-inline bool Request::has_file(const char *key) const {
- return files.find(key) != files.end();
-}
-
-inline MultipartFormData Request::get_file_value(const char *key) const {
- auto it = files.find(key);
- if (it != files.end()) { return it->second; }
- return MultipartFormData();
-}
-
-// Response implementation
-inline bool Response::has_header(const char *key) const {
- return headers.find(key) != headers.end();
-}
-
-inline std::string Response::get_header_value(const char *key,
- size_t id) const {
- return detail::get_header_value(headers, key, id, "");
-}
-
-template <typename T>
-inline T Response::get_header_value(const char *key, size_t id) const {
- return detail::get_header_value<T>(headers, key, id, 0);
-}
-
-inline size_t Response::get_header_value_count(const char *key) const {
- auto r = headers.equal_range(key);
- return static_cast<size_t>(std::distance(r.first, r.second));
-}
-
-inline void Response::set_header(const char *key, const char *val) {
- if (!detail::has_crlf(key) && !detail::has_crlf(val)) {
- headers.emplace(key, val);
- }
-}
-
-inline void Response::set_header(const char *key, const std::string &val) {
- if (!detail::has_crlf(key) && !detail::has_crlf(val.c_str())) {
- headers.emplace(key, val);
- }
-}
-
-inline void Response::set_redirect(const char *url, int stat) {
- if (!detail::has_crlf(url)) {
- set_header("Location", url);
- if (300 <= stat && stat < 400) {
- this->status = stat;
- } else {
- this->status = 302;
- }
- }
-}
-
-inline void Response::set_redirect(const std::string &url, int stat) {
- set_redirect(url.c_str(), stat);
-}
-
-inline void Response::set_content(const char *s, size_t n,
- const char *content_type) {
- body.assign(s, n);
- set_header("Content-Type", content_type);
-}
-
-inline void Response::set_content(std::string s, const char *content_type) {
- body = std::move(s);
- set_header("Content-Type", content_type);
-}
-
-inline void
-Response::set_content_provider(size_t in_length, const char *content_type,
- ContentProvider provider,
- const std::function<void()> &resource_releaser) {
- assert(in_length > 0);
- set_header("Content-Type", content_type);
- content_length_ = in_length;
- content_provider_ = std::move(provider);
- content_provider_resource_releaser_ = resource_releaser;
- is_chunked_content_provider = false;
-}
-
-inline void
-Response::set_content_provider(const char *content_type,
- ContentProviderWithoutLength provider,
- const std::function<void()> &resource_releaser) {
- set_header("Content-Type", content_type);
- content_length_ = 0;
- content_provider_ = detail::ContentProviderAdapter(std::move(provider));
- content_provider_resource_releaser_ = resource_releaser;
- is_chunked_content_provider = false;
-}
-
-inline void Response::set_chunked_content_provider(
- const char *content_type, ContentProviderWithoutLength provider,
- const std::function<void()> &resource_releaser) {
- set_header("Content-Type", content_type);
- content_length_ = 0;
- content_provider_ = detail::ContentProviderAdapter(std::move(provider));
- content_provider_resource_releaser_ = resource_releaser;
- is_chunked_content_provider = true;
-}
-
-// Rstream implementation
-inline ssize_t Stream::write(const char *ptr) {
- return write(ptr, strlen(ptr));
-}
-
-inline ssize_t Stream::write(const std::string &s) {
- return write(s.data(), s.size());
-}
-
-template <typename... Args>
-inline ssize_t Stream::write_format(const char *fmt, const Args &... args) {
- const auto bufsiz = 2048;
- std::array<char, bufsiz> buf;
-
-#if defined(_MSC_VER) && _MSC_VER < 1900
- auto sn = _snprintf_s(buf.data(), bufsiz - 1, buf.size() - 1, fmt, args...);
-#else
- auto sn = snprintf(buf.data(), buf.size() - 1, fmt, args...);
-#endif
- if (sn <= 0) { return sn; }
-
- auto n = static_cast<size_t>(sn);
-
- if (n >= buf.size() - 1) {
- std::vector<char> glowable_buf(buf.size());
-
- while (n >= glowable_buf.size() - 1) {
- glowable_buf.resize(glowable_buf.size() * 2);
-#if defined(_MSC_VER) && _MSC_VER < 1900
- n = static_cast<size_t>(_snprintf_s(&glowable_buf[0], glowable_buf.size(),
- glowable_buf.size() - 1, fmt,
- args...));
-#else
- n = static_cast<size_t>(
- snprintf(&glowable_buf[0], glowable_buf.size() - 1, fmt, args...));
-#endif
- }
- return write(&glowable_buf[0], n);
- } else {
- return write(buf.data(), n);
- }
-}
-
-namespace detail {
-
-// Socket stream implementation
-inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec,
- time_t read_timeout_usec,
- time_t write_timeout_sec,
- time_t write_timeout_usec)
- : sock_(sock), read_timeout_sec_(read_timeout_sec),
- read_timeout_usec_(read_timeout_usec),
- write_timeout_sec_(write_timeout_sec),
- write_timeout_usec_(write_timeout_usec) {}
-
-inline SocketStream::~SocketStream() {}
-
-inline bool SocketStream::is_readable() const {
- return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0;
-}
-
-inline bool SocketStream::is_writable() const {
- return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0;
-}
-
-inline ssize_t SocketStream::read(char *ptr, size_t size) {
- if (!is_readable()) { return -1; }
-
-#ifdef _WIN32
- if (size > static_cast<size_t>((std::numeric_limits<int>::max)())) {
- return -1;
- }
- return recv(sock_, ptr, static_cast<int>(size), 0);
-#else
- return handle_EINTR([&]() { return recv(sock_, ptr, size, 0); });
-#endif
-}
-
-inline ssize_t SocketStream::write(const char *ptr, size_t size) {
- if (!is_writable()) { return -1; }
-
-#ifdef _WIN32
- if (size > static_cast<size_t>((std::numeric_limits<int>::max)())) {
- return -1;
- }
- return send(sock_, ptr, static_cast<int>(size), 0);
-#else
- return handle_EINTR([&]() { return send(sock_, ptr, size, 0); });
-#endif
-}
-
-inline void SocketStream::get_remote_ip_and_port(std::string &ip,
- int &port) const {
- return detail::get_remote_ip_and_port(sock_, ip, port);
-}
-
-// Buffer stream implementation
-inline bool BufferStream::is_readable() const { return true; }
-
-inline bool BufferStream::is_writable() const { return true; }
-
-inline ssize_t BufferStream::read(char *ptr, size_t size) {
-#if defined(_MSC_VER) && _MSC_VER <= 1900
- auto len_read = buffer._Copy_s(ptr, size, size, position);
-#else
- auto len_read = buffer.copy(ptr, size, position);
-#endif
- position += static_cast<size_t>(len_read);
- return static_cast<ssize_t>(len_read);
-}
-
-inline ssize_t BufferStream::write(const char *ptr, size_t size) {
- buffer.append(ptr, size);
- return static_cast<ssize_t>(size);
-}
-
-inline void BufferStream::get_remote_ip_and_port(std::string & /*ip*/,
- int & /*port*/) const {}
-
-inline const std::string &BufferStream::get_buffer() const { return buffer; }
-
-} // namespace detail
-
-// HTTP server implementation
-inline Server::Server()
- : new_task_queue(
- [] { return new ThreadPool(CPPHTTPLIB_THREAD_POOL_COUNT); }),
- svr_sock_(INVALID_SOCKET), is_running_(false) {
-#ifndef _WIN32
- signal(SIGPIPE, SIG_IGN);
-#endif
-}
-
-inline Server::~Server() {}
-
-inline Server &Server::Get(const char *pattern, Handler handler) {
- get_handlers_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Post(const char *pattern, Handler handler) {
- post_handlers_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Post(const char *pattern,
- HandlerWithContentReader handler) {
- post_handlers_for_content_reader_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Put(const char *pattern, Handler handler) {
- put_handlers_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Put(const char *pattern,
- HandlerWithContentReader handler) {
- put_handlers_for_content_reader_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Patch(const char *pattern, Handler handler) {
- patch_handlers_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Patch(const char *pattern,
- HandlerWithContentReader handler) {
- patch_handlers_for_content_reader_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Delete(const char *pattern, Handler handler) {
- delete_handlers_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Delete(const char *pattern,
- HandlerWithContentReader handler) {
- delete_handlers_for_content_reader_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline Server &Server::Options(const char *pattern, Handler handler) {
- options_handlers_.push_back(
- std::make_pair(std::regex(pattern), std::move(handler)));
- return *this;
-}
-
-inline bool Server::set_base_dir(const char *dir, const char *mount_point) {
- return set_mount_point(mount_point, dir);
-}
-
-inline bool Server::set_mount_point(const char *mount_point, const char *dir,
- Headers headers) {
- if (detail::is_dir(dir)) {
- std::string mnt = mount_point ? mount_point : "/";
- if (!mnt.empty() && mnt[0] == '/') {
- base_dirs_.push_back({mnt, dir, std::move(headers)});
- return true;
- }
- }
- return false;
-}
-
-inline bool Server::remove_mount_point(const char *mount_point) {
- for (auto it = base_dirs_.begin(); it != base_dirs_.end(); ++it) {
- if (it->mount_point == mount_point) {
- base_dirs_.erase(it);
- return true;
- }
- }
- return false;
-}
-
-inline void Server::set_file_extension_and_mimetype_mapping(const char *ext,
- const char *mime) {
- file_extension_and_mimetype_map_[ext] = mime;
-}
-
-inline void Server::set_file_request_handler(Handler handler) {
- file_request_handler_ = std::move(handler);
-}
-
-inline void Server::set_error_handler(Handler handler) {
- error_handler_ = std::move(handler);
-}
-
-inline void Server::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; }
-
-inline void Server::set_socket_options(SocketOptions socket_options) {
- socket_options_ = std::move(socket_options);
-}
-
-inline void Server::set_logger(Logger logger) { logger_ = std::move(logger); }
-
-inline void
-Server::set_expect_100_continue_handler(Expect100ContinueHandler handler) {
- expect_100_continue_handler_ = std::move(handler);
-}
-
-inline void Server::set_keep_alive_max_count(size_t count) {
- keep_alive_max_count_ = count;
-}
-
-inline void Server::set_keep_alive_timeout(time_t sec) {
- keep_alive_timeout_sec_ = sec;
-}
-
-inline void Server::set_read_timeout(time_t sec, time_t usec) {
- read_timeout_sec_ = sec;
- read_timeout_usec_ = usec;
-}
-
-inline void Server::set_write_timeout(time_t sec, time_t usec) {
- write_timeout_sec_ = sec;
- write_timeout_usec_ = usec;
-}
-
-inline void Server::set_idle_interval(time_t sec, time_t usec) {
- idle_interval_sec_ = sec;
- idle_interval_usec_ = usec;
-}
-
-inline void Server::set_payload_max_length(size_t length) {
- payload_max_length_ = length;
-}
-
-inline bool Server::bind_to_port(const char *host, int port, int socket_flags) {
- if (bind_internal(host, port, socket_flags) < 0) return false;
- return true;
-}
-inline int Server::bind_to_any_port(const char *host, int socket_flags) {
- return bind_internal(host, 0, socket_flags);
-}
-
-inline bool Server::listen_after_bind() { return listen_internal(); }
-
-inline bool Server::listen(const char *host, int port, int socket_flags) {
- return bind_to_port(host, port, socket_flags) && listen_internal();
-}
-
-inline bool Server::is_running() const { return is_running_; }
-
-inline void Server::stop() {
- if (is_running_) {
- assert(svr_sock_ != INVALID_SOCKET);
- std::atomic<socket_t> sock(svr_sock_.exchange(INVALID_SOCKET));
- detail::shutdown_socket(sock);
- detail::close_socket(sock);
- }
-}
-
-inline bool Server::parse_request_line(const char *s, Request &req) {
- const static std::regex re(
- "(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PRI) "
- "(([^?]+)(?:\\?(.*?))?) (HTTP/1\\.[01])\r\n");
-
- std::cmatch m;
- if (std::regex_match(s, m, re)) {
- req.version = std::string(m[5]);
- req.method = std::string(m[1]);
- req.target = std::string(m[2]);
- req.path = detail::decode_url(m[3], false);
-
- // Parse query text
- auto len = std::distance(m[4].first, m[4].second);
- if (len > 0) { detail::parse_query_text(m[4], req.params); }
-
- return true;
- }
-
- return false;
-}
-
-inline bool Server::write_response(Stream &strm, bool close_connection,
- const Request &req, Response &res) {
- assert(res.status != -1);
-
- if (400 <= res.status && error_handler_) { error_handler_(req, res); }
-
- detail::BufferStream bstrm;
-
- // Response line
- if (!bstrm.write_format("HTTP/1.1 %d %s\r\n", res.status,
- detail::status_message(res.status))) {
- return false;
- }
-
- // Headers
- if (close_connection || req.get_header_value("Connection") == "close") {
- res.set_header("Connection", "close");
- } else {
- std::stringstream ss;
- ss << "timeout=" << keep_alive_timeout_sec_
- << ", max=" << keep_alive_max_count_;
- res.set_header("Keep-Alive", ss.str());
- }
-
- if (!res.has_header("Content-Type") &&
- (!res.body.empty() || res.content_length_ > 0 || res.content_provider_)) {
- res.set_header("Content-Type", "text/plain");
- }
-
- if (!res.has_header("Accept-Ranges") && req.method == "HEAD") {
- res.set_header("Accept-Ranges", "bytes");
- }
-
- std::string content_type;
- std::string boundary;
-
- if (req.ranges.size() > 1) {
- boundary = detail::make_multipart_data_boundary();
-
- auto it = res.headers.find("Content-Type");
- if (it != res.headers.end()) {
- content_type = it->second;
- res.headers.erase(it);
- }
-
- res.headers.emplace("Content-Type",
- "multipart/byteranges; boundary=" + boundary);
- }
-
- auto type = detail::encoding_type(req, res);
-
- if (res.body.empty()) {
- if (res.content_length_ > 0) {
- size_t length = 0;
- if (req.ranges.empty()) {
- length = res.content_length_;
- } else if (req.ranges.size() == 1) {
- auto offsets =
- detail::get_range_offset_and_length(req, res.content_length_, 0);
- auto offset = offsets.first;
- length = offsets.second;
- auto content_range = detail::make_content_range_header_field(
- offset, length, res.content_length_);
- res.set_header("Content-Range", content_range);
- } else {
- length = detail::get_multipart_ranges_data_length(req, res, boundary,
- content_type);
- }
- res.set_header("Content-Length", std::to_string(length));
- } else {
- if (res.content_provider_) {
- if (res.is_chunked_content_provider) {
- res.set_header("Transfer-Encoding", "chunked");
- if (type == detail::EncodingType::Gzip) {
- res.set_header("Content-Encoding", "gzip");
- } else if (type == detail::EncodingType::Brotli) {
- res.set_header("Content-Encoding", "br");
- }
- }
- } else {
- res.set_header("Content-Length", "0");
- }
- }
- } else {
- if (req.ranges.empty()) {
- ;
- } else if (req.ranges.size() == 1) {
- auto offsets =
- detail::get_range_offset_and_length(req, res.body.size(), 0);
- auto offset = offsets.first;
- auto length = offsets.second;
- auto content_range = detail::make_content_range_header_field(
- offset, length, res.body.size());
- res.set_header("Content-Range", content_range);
- res.body = res.body.substr(offset, length);
- } else {
- res.body =
- detail::make_multipart_ranges_data(req, res, boundary, content_type);
- }
-
- if (type != detail::EncodingType::None) {
- std::unique_ptr<detail::compressor> compressor;
-
- if (type == detail::EncodingType::Gzip) {
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
- compressor = detail::make_unique<detail::gzip_compressor>();
- res.set_header("Content-Encoding", "gzip");
-#endif
- } else if (type == detail::EncodingType::Brotli) {
-#ifdef CPPHTTPLIB_BROTLI_SUPPORT
- compressor = detail::make_unique<detail::brotli_compressor>();
- res.set_header("Content-Encoding", "brotli");
-#endif
- }
-
- if (compressor) {
- std::string compressed;
-
- if (!compressor->compress(res.body.data(), res.body.size(), true,
- [&](const char *data, size_t data_len) {
- compressed.append(data, data_len);
- return true;
- })) {
- return false;
- }
-
- res.body.swap(compressed);
- }
- }
-
- auto length = std::to_string(res.body.size());
- res.set_header("Content-Length", length);
- }
-
- if (!detail::write_headers(bstrm, res, Headers())) { return false; }
-
- // Flush buffer
- auto &data = bstrm.get_buffer();
- strm.write(data.data(), data.size());
-
- // Body
- auto ret = true;
- if (req.method != "HEAD") {
- if (!res.body.empty()) {
- if (!strm.write(res.body)) { ret = false; }
- } else if (res.content_provider_) {
- if (!write_content_with_provider(strm, req, res, boundary,
- content_type)) {
- ret = false;
- }
- }
- }
-
- // Log
- if (logger_) { logger_(req, res); }
-
- return ret;
-}
-
-inline bool
-Server::write_content_with_provider(Stream &strm, const Request &req,
- Response &res, const std::string &boundary,
- const std::string &content_type) {
- auto is_shutting_down = [this]() {
- return this->svr_sock_ == INVALID_SOCKET;
- };
-
- if (res.content_length_ > 0) {
- if (req.ranges.empty()) {
- if (detail::write_content(strm, res.content_provider_, 0,
- res.content_length_, is_shutting_down) < 0) {
- return false;
- }
- } else if (req.ranges.size() == 1) {
- auto offsets =
- detail::get_range_offset_and_length(req, res.content_length_, 0);
- auto offset = offsets.first;
- auto length = offsets.second;
- if (detail::write_content(strm, res.content_provider_, offset, length,
- is_shutting_down) < 0) {
- return false;
- }
- } else {
- if (!detail::write_multipart_ranges_data(
- strm, req, res, boundary, content_type, is_shutting_down)) {
- return false;
- }
- }
- } else {
- if (res.is_chunked_content_provider) {
- auto type = detail::encoding_type(req, res);
-
- std::unique_ptr<detail::compressor> compressor;
- if (type == detail::EncodingType::Gzip) {
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
- compressor = detail::make_unique<detail::gzip_compressor>();
-#endif
- } else if (type == detail::EncodingType::Brotli) {
-#ifdef CPPHTTPLIB_BROTLI_SUPPORT
- compressor = detail::make_unique<detail::brotli_compressor>();
-#endif
- } else {
- compressor = detail::make_unique<detail::nocompressor>();
- }
- assert(compressor != nullptr);
-
- if (detail::write_content_chunked(strm, res.content_provider_,
- is_shutting_down, *compressor) < 0) {
- return false;
- }
- } else {
- if (detail::write_content_without_length(strm, res.content_provider_,
- is_shutting_down) < 0) {
- return false;
- }
- }
- }
- return true;
-}
-
-inline bool Server::read_content(Stream &strm, Request &req, Response &res) {
- MultipartFormDataMap::iterator cur;
- if (read_content_core(
- strm, req, res,
- // Regular
- [&](const char *buf, size_t n) {
- if (req.body.size() + n > req.body.max_size()) { return false; }
- req.body.append(buf, n);
- return true;
- },
- // Multipart
- [&](const MultipartFormData &file) {
- cur = req.files.emplace(file.name, file);
- return true;
- },
- [&](const char *buf, size_t n) {
- auto &content = cur->second.content;
- if (content.size() + n > content.max_size()) { return false; }
- content.append(buf, n);
- return true;
- })) {
- const auto &content_type = req.get_header_value("Content-Type");
- if (!content_type.find("application/x-www-form-urlencoded")) {
- detail::parse_query_text(req.body, req.params);
- }
- return true;
- }
- return false;
-}
-
-inline bool Server::read_content_with_content_receiver(
- Stream &strm, Request &req, Response &res, ContentReceiver receiver,
- MultipartContentHeader multipart_header,
- ContentReceiver multipart_receiver) {
- return read_content_core(strm, req, res, std::move(receiver),
- std::move(multipart_header),
- std::move(multipart_receiver));
-}
-
-inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
- ContentReceiver receiver,
- MultipartContentHeader mulitpart_header,
- ContentReceiver multipart_receiver) {
- detail::MultipartFormDataParser multipart_form_data_parser;
- ContentReceiverWithProgress out;
-
- if (req.is_multipart_form_data()) {
- const auto &content_type = req.get_header_value("Content-Type");
- std::string boundary;
- if (!detail::parse_multipart_boundary(content_type, boundary)) {
- res.status = 400;
- return false;
- }
-
- multipart_form_data_parser.set_boundary(std::move(boundary));
- out = [&](const char *buf, size_t n, uint64_t /*off*/, uint64_t /*len*/) {
- /* For debug
- size_t pos = 0;
- while (pos < n) {
- auto read_size = std::min<size_t>(1, n - pos);
- auto ret = multipart_form_data_parser.parse(
- buf + pos, read_size, multipart_receiver, mulitpart_header);
- if (!ret) { return false; }
- pos += read_size;
- }
- return true;
- */
- return multipart_form_data_parser.parse(buf, n, multipart_receiver,
- mulitpart_header);
- };
- } else {
- out = [receiver](const char *buf, size_t n, uint64_t /*off*/,
- uint64_t /*len*/) { return receiver(buf, n); };
- }
-
- if (req.method == "DELETE" && !req.has_header("Content-Length")) {
- return true;
- }
-
- if (!detail::read_content(strm, req, payload_max_length_, res.status, nullptr,
- out, true)) {
- return false;
- }
-
- if (req.is_multipart_form_data()) {
- if (!multipart_form_data_parser.is_valid()) {
- res.status = 400;
- return false;
- }
- }
-
- return true;
-}
-
-inline bool Server::handle_file_request(Request &req, Response &res,
- bool head) {
- for (const auto &entry : base_dirs_) {
- // Prefix match
- if (!req.path.compare(0, entry.mount_point.size(), entry.mount_point)) {
- std::string sub_path = "/" + req.path.substr(entry.mount_point.size());
- if (detail::is_valid_path(sub_path)) {
- auto path = entry.base_dir + sub_path;
- if (path.back() == '/') { path += "index.html"; }
-
- if (detail::is_file(path)) {
- detail::read_file(path, res.body);
- auto type =
- detail::find_content_type(path, file_extension_and_mimetype_map_);
- if (type) { res.set_header("Content-Type", type); }
- for (const auto &kv : entry.headers) {
- res.set_header(kv.first.c_str(), kv.second);
- }
- res.status = 200;
- if (!head && file_request_handler_) {
- file_request_handler_(req, res);
- }
- return true;
- }
- }
- }
- }
- return false;
-}
-
-inline socket_t
-Server::create_server_socket(const char *host, int port, int socket_flags,
- SocketOptions socket_options) const {
- return detail::create_socket(
- host, port, socket_flags, tcp_nodelay_, std::move(socket_options),
- [](socket_t sock, struct addrinfo &ai) -> bool {
- if (::bind(sock, ai.ai_addr, static_cast<socklen_t>(ai.ai_addrlen))) {
- return false;
- }
- if (::listen(sock, 5)) { // Listen through 5 channels
- return false;
- }
- return true;
- });
-}
-
-inline int Server::bind_internal(const char *host, int port, int socket_flags) {
- if (!is_valid()) { return -1; }
-
- svr_sock_ = create_server_socket(host, port, socket_flags, socket_options_);
- if (svr_sock_ == INVALID_SOCKET) { return -1; }
-
- if (port == 0) {
- struct sockaddr_storage addr;
- socklen_t addr_len = sizeof(addr);
- if (getsockname(svr_sock_, reinterpret_cast<struct sockaddr *>(&addr),
- &addr_len) == -1) {
- return -1;
- }
- if (addr.ss_family == AF_INET) {
- return ntohs(reinterpret_cast<struct sockaddr_in *>(&addr)->sin_port);
- } else if (addr.ss_family == AF_INET6) {
- return ntohs(reinterpret_cast<struct sockaddr_in6 *>(&addr)->sin6_port);
- } else {
- return -1;
- }
- } else {
- return port;
- }
-}
-
-inline bool Server::listen_internal() {
- auto ret = true;
- is_running_ = true;
-
- {
- std::unique_ptr<TaskQueue> task_queue(new_task_queue());
-
- while (svr_sock_ != INVALID_SOCKET) {
-#ifndef _WIN32
- if (idle_interval_sec_ > 0 || idle_interval_usec_ > 0) {
-#endif
- auto val = detail::select_read(svr_sock_, idle_interval_sec_,
- idle_interval_usec_);
- if (val == 0) { // Timeout
- task_queue->on_idle();
- continue;
- }
-#ifndef _WIN32
- }
-#endif
- socket_t sock = accept(svr_sock_, nullptr, nullptr);
-
- if (sock == INVALID_SOCKET) {
- if (errno == EMFILE) {
- // The per-process limit of open file descriptors has been reached.
- // Try to accept new connections after a short sleep.
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
- continue;
- }
- if (svr_sock_ != INVALID_SOCKET) {
- detail::close_socket(svr_sock_);
- ret = false;
- } else {
- ; // The server socket was closed by user.
- }
- break;
- }
-
-#if __cplusplus > 201703L
- task_queue->enqueue([=, this]() { process_and_close_socket(sock); });
-#else
- task_queue->enqueue([=]() { process_and_close_socket(sock); });
-#endif
- }
-
- task_queue->shutdown();
- }
-
- is_running_ = false;
- return ret;
-}
-
-inline bool Server::routing(Request &req, Response &res, Stream &strm) {
- // File handler
- bool is_head_request = req.method == "HEAD";
- if ((req.method == "GET" || is_head_request) &&
- handle_file_request(req, res, is_head_request)) {
- return true;
- }
-
- if (detail::expect_content(req)) {
- // Content reader handler
- {
- ContentReader reader(
- [&](ContentReceiver receiver) {
- return read_content_with_content_receiver(
- strm, req, res, std::move(receiver), nullptr, nullptr);
- },
- [&](MultipartContentHeader header, ContentReceiver receiver) {
- return read_content_with_content_receiver(strm, req, res, nullptr,
- std::move(header),
- std::move(receiver));
- });
-
- if (req.method == "POST") {
- if (dispatch_request_for_content_reader(
- req, res, std::move(reader),
- post_handlers_for_content_reader_)) {
- return true;
- }
- } else if (req.method == "PUT") {
- if (dispatch_request_for_content_reader(
- req, res, std::move(reader),
- put_handlers_for_content_reader_)) {
- return true;
- }
- } else if (req.method == "PATCH") {
- if (dispatch_request_for_content_reader(
- req, res, std::move(reader),
- patch_handlers_for_content_reader_)) {
- return true;
- }
- } else if (req.method == "DELETE") {
- if (dispatch_request_for_content_reader(
- req, res, std::move(reader),
- delete_handlers_for_content_reader_)) {
- return true;
- }
- }
- }
-
- // Read content into `req.body`
- if (!read_content(strm, req, res)) { return false; }
- }
-
- // Regular handler
- if (req.method == "GET" || req.method == "HEAD") {
- return dispatch_request(req, res, get_handlers_);
- } else if (req.method == "POST") {
- return dispatch_request(req, res, post_handlers_);
- } else if (req.method == "PUT") {
- return dispatch_request(req, res, put_handlers_);
- } else if (req.method == "DELETE") {
- return dispatch_request(req, res, delete_handlers_);
- } else if (req.method == "OPTIONS") {
- return dispatch_request(req, res, options_handlers_);
- } else if (req.method == "PATCH") {
- return dispatch_request(req, res, patch_handlers_);
- }
-
- res.status = 400;
- return false;
-}
-
-inline bool Server::dispatch_request(Request &req, Response &res,
- const Handlers &handlers) {
- try {
- for (const auto &x : handlers) {
- const auto &pattern = x.first;
- const auto &handler = x.second;
-
- if (std::regex_match(req.path, req.matches, pattern)) {
- handler(req, res);
- return true;
- }
- }
- } catch (const std::exception &ex) {
- res.status = 500;
- res.set_header("EXCEPTION_WHAT", ex.what());
- } catch (...) {
- res.status = 500;
- res.set_header("EXCEPTION_WHAT", "UNKNOWN");
- }
- return false;
-}
-
-inline bool Server::dispatch_request_for_content_reader(
- Request &req, Response &res, ContentReader content_reader,
- const HandlersForContentReader &handlers) {
- for (const auto &x : handlers) {
- const auto &pattern = x.first;
- const auto &handler = x.second;
-
- if (std::regex_match(req.path, req.matches, pattern)) {
- handler(req, res, content_reader);
- return true;
- }
- }
- return false;
-}
-
-inline bool
-Server::process_request(Stream &strm, bool close_connection,
- bool &connection_closed,
- const std::function<void(Request &)> &setup_request) {
- std::array<char, 2048> buf{};
-
- detail::stream_line_reader line_reader(strm, buf.data(), buf.size());
-
- // Connection has been closed on client
- if (!line_reader.getline()) { return false; }
-
- Request req;
- Response res;
-
- res.version = "HTTP/1.1";
-
- // Check if the request URI doesn't exceed the limit
- if (line_reader.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) {
- Headers dummy;
- detail::read_headers(strm, dummy);
- res.status = 414;
- return write_response(strm, close_connection, req, res);
- }
-
- // Request line and headers
- if (!parse_request_line(line_reader.ptr(), req) ||
- !detail::read_headers(strm, req.headers)) {
- res.status = 400;
- return write_response(strm, close_connection, req, res);
- }
-
- if (req.get_header_value("Connection") == "close") {
- connection_closed = true;
- }
-
- if (req.version == "HTTP/1.0" &&
- req.get_header_value("Connection") != "Keep-Alive") {
- connection_closed = true;
- }
-
- strm.get_remote_ip_and_port(req.remote_addr, req.remote_port);
- req.set_header("REMOTE_ADDR", req.remote_addr);
- req.set_header("REMOTE_PORT", std::to_string(req.remote_port));
-
- if (req.has_header("Range")) {
- const auto &range_header_value = req.get_header_value("Range");
- if (!detail::parse_range_header(range_header_value, req.ranges)) {
- res.status = 416;
- return write_response(strm, close_connection, req, res);
- }
- }
-
- if (setup_request) { setup_request(req); }
-
- if (req.get_header_value("Expect") == "100-continue") {
- auto status = 100;
- if (expect_100_continue_handler_) {
- status = expect_100_continue_handler_(req, res);
- }
- switch (status) {
- case 100:
- case 417:
- strm.write_format("HTTP/1.1 %d %s\r\n\r\n", status,
- detail::status_message(status));
- break;
- default: return write_response(strm, close_connection, req, res);
- }
- }
-
- // Rounting
- if (routing(req, res, strm)) {
- if (res.status == -1) { res.status = req.ranges.empty() ? 200 : 206; }
- } else {
- if (res.status == -1) { res.status = 404; }
- }
-
- return write_response(strm, close_connection, req, res);
-}
-
-inline bool Server::is_valid() const { return true; }
-
-inline bool Server::process_and_close_socket(socket_t sock) {
- auto ret = detail::process_server_socket(
- sock, keep_alive_max_count_, keep_alive_timeout_sec_, read_timeout_sec_,
- read_timeout_usec_, write_timeout_sec_, write_timeout_usec_,
- [this](Stream &strm, bool close_connection, bool &connection_closed) {
- return process_request(strm, close_connection, connection_closed,
- nullptr);
- });
-
- detail::shutdown_socket(sock);
- detail::close_socket(sock);
- return ret;
-}
-
-// HTTP client implementation
-inline ClientImpl::ClientImpl(const std::string &host)
- : ClientImpl(host, 80, std::string(), std::string()) {}
-
-inline ClientImpl::ClientImpl(const std::string &host, int port)
- : ClientImpl(host, port, std::string(), std::string()) {}
-
-inline ClientImpl::ClientImpl(const std::string &host, int port,
- const std::string &client_cert_path,
- const std::string &client_key_path)
- : error_(Error::Success), host_(host), port_(port),
- host_and_port_(host_ + ":" + std::to_string(port_)),
- client_cert_path_(client_cert_path), client_key_path_(client_key_path) {}
-
-inline ClientImpl::~ClientImpl() { lock_socket_and_shutdown_and_close(); }
-
-inline bool ClientImpl::is_valid() const { return true; }
-
-inline Error ClientImpl::get_last_error() const { return error_; }
-
-inline void ClientImpl::copy_settings(const ClientImpl &rhs) {
- client_cert_path_ = rhs.client_cert_path_;
- client_key_path_ = rhs.client_key_path_;
- connection_timeout_sec_ = rhs.connection_timeout_sec_;
- read_timeout_sec_ = rhs.read_timeout_sec_;
- read_timeout_usec_ = rhs.read_timeout_usec_;
- write_timeout_sec_ = rhs.write_timeout_sec_;
- write_timeout_usec_ = rhs.write_timeout_usec_;
- basic_auth_username_ = rhs.basic_auth_username_;
- basic_auth_password_ = rhs.basic_auth_password_;
- bearer_token_auth_token_ = rhs.bearer_token_auth_token_;
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- digest_auth_username_ = rhs.digest_auth_username_;
- digest_auth_password_ = rhs.digest_auth_password_;
-#endif
- keep_alive_ = rhs.keep_alive_;
- follow_location_ = rhs.follow_location_;
- tcp_nodelay_ = rhs.tcp_nodelay_;
- socket_options_ = rhs.socket_options_;
- compress_ = rhs.compress_;
- decompress_ = rhs.decompress_;
- interface_ = rhs.interface_;
- proxy_host_ = rhs.proxy_host_;
- proxy_port_ = rhs.proxy_port_;
- proxy_basic_auth_username_ = rhs.proxy_basic_auth_username_;
- proxy_basic_auth_password_ = rhs.proxy_basic_auth_password_;
- proxy_bearer_token_auth_token_ = rhs.proxy_bearer_token_auth_token_;
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- proxy_digest_auth_username_ = rhs.proxy_digest_auth_username_;
- proxy_digest_auth_password_ = rhs.proxy_digest_auth_password_;
-#endif
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- server_certificate_verification_ = rhs.server_certificate_verification_;
-#endif
- logger_ = rhs.logger_;
-}
-
-inline socket_t ClientImpl::create_client_socket() const {
- if (!proxy_host_.empty() && proxy_port_ != -1) {
- return detail::create_client_socket(
- proxy_host_.c_str(), proxy_port_, tcp_nodelay_, socket_options_,
- connection_timeout_sec_, connection_timeout_usec_, interface_, error_);
- }
- return detail::create_client_socket(
- host_.c_str(), port_, tcp_nodelay_, socket_options_,
- connection_timeout_sec_, connection_timeout_usec_, interface_, error_);
-}
-
-inline bool ClientImpl::create_and_connect_socket(Socket &socket) {
- auto sock = create_client_socket();
- if (sock == INVALID_SOCKET) { return false; }
- socket.sock = sock;
- return true;
-}
-
-inline void ClientImpl::shutdown_ssl(Socket &socket, bool shutdown_gracefully) {
- (void)socket;
- (void)shutdown_gracefully;
- //If there are any requests in flight from threads other than us, then it's
- //a thread-unsafe race because individual ssl* objects are not thread-safe.
- assert(socket_requests_in_flight_ == 0 ||
- socket_requests_are_from_thread_ == std::this_thread::get_id());
-}
-
-inline void ClientImpl::shutdown_socket(Socket &socket) {
- if (socket.sock == INVALID_SOCKET)
- return;
- detail::shutdown_socket(socket.sock);
-}
-
-inline void ClientImpl::close_socket(Socket &socket) {
- // If there are requests in flight in another thread, usually closing
- // the socket will be fine and they will simply receive an error when
- // using the closed socket, but it is still a bug since rarely the OS
- // may reassign the socket id to be used for a new socket, and then
- // suddenly they will be operating on a live socket that is different
- // than the one they intended!
- assert(socket_requests_in_flight_ == 0 ||
- socket_requests_are_from_thread_ == std::this_thread::get_id());
- // It is also a bug if this happens while SSL is still active
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- assert(socket.ssl == nullptr);
-#endif
- if (socket.sock == INVALID_SOCKET)
- return;
- detail::close_socket(socket.sock);
- socket.sock = INVALID_SOCKET;
-}
-
-inline void ClientImpl::lock_socket_and_shutdown_and_close() {
- std::lock_guard<std::mutex> guard(socket_mutex_);
- shutdown_ssl(socket_, true);
- shutdown_socket(socket_);
- close_socket(socket_);
-}
-
-inline bool ClientImpl::read_response_line(Stream &strm, Response &res) {
- std::array<char, 2048> buf;
-
- detail::stream_line_reader line_reader(strm, buf.data(), buf.size());
-
- if (!line_reader.getline()) { return false; }
-
- const static std::regex re("(HTTP/1\\.[01]) (\\d+) (.*?)\r\n");
-
- std::cmatch m;
- if (!std::regex_match(line_reader.ptr(), m, re)) { return false; }
- res.version = std::string(m[1]);
- res.status = std::stoi(std::string(m[2]));
- res.reason = std::string(m[3]);
-
- // Ignore '100 Continue'
- while (res.status == 100) {
- if (!line_reader.getline()) { return false; } // CRLF
- if (!line_reader.getline()) { return false; } // next response line
-
- if (!std::regex_match(line_reader.ptr(), m, re)) { return false; }
- res.version = std::string(m[1]);
- res.status = std::stoi(std::string(m[2]));
- res.reason = std::string(m[3]);
- }
-
- return true;
-}
-
-inline bool ClientImpl::send(const Request &req, Response &res) {
- std::lock_guard<std::recursive_mutex> request_mutex_guard(request_mutex_);
-
- {
- std::lock_guard<std::mutex> guard(socket_mutex_);
- // Set this to false immediately - if it ever gets set to true by the end of the
- // request, we know another thread instructed us to close the socket.
- socket_should_be_closed_when_request_is_done_ = false;
-
- auto is_alive = false;
- if (socket_.is_open()) {
- is_alive = detail::select_write(socket_.sock, 0, 0) > 0;
- if (!is_alive) {
- // Attempt to avoid sigpipe by shutting down nongracefully if it seems like
- // the other side has already closed the connection
- // Also, there cannot be any requests in flight from other threads since we locked
- // request_mutex_, so safe to close everything immediately
- const bool shutdown_gracefully = false;
- shutdown_ssl(socket_, shutdown_gracefully);
- shutdown_socket(socket_);
- close_socket(socket_);
- }
- }
-
- if (!is_alive) {
- if (!create_and_connect_socket(socket_)) { return false; }
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- // TODO: refactoring
- if (is_ssl()) {
- auto &scli = static_cast<SSLClient &>(*this);
- if (!proxy_host_.empty() && proxy_port_ != -1) {
- bool success = false;
- if (!scli.connect_with_proxy(socket_, res, success)) {
- return success;
- }
- }
-
- if (!scli.initialize_ssl(socket_)) { return false; }
- }
-#endif
- }
-
- // Mark the current socket as being in use so that it cannot be closed by anyone
- // else while this request is ongoing, even though we will be releasing the mutex.
- if (socket_requests_in_flight_ > 1) {
- assert(socket_requests_are_from_thread_ == std::this_thread::get_id());
- }
- socket_requests_in_flight_ += 1;
- socket_requests_are_from_thread_ = std::this_thread::get_id();
- }
-
- auto close_connection = !keep_alive_;
- auto ret = process_socket(socket_, [&](Stream &strm) {
- return handle_request(strm, req, res, close_connection);
- });
-
- //Briefly lock mutex in order to mark that a request is no longer ongoing
- {
- std::lock_guard<std::mutex> guard(socket_mutex_);
- socket_requests_in_flight_ -= 1;
- if (socket_requests_in_flight_ <= 0) {
- assert(socket_requests_in_flight_ == 0);
- socket_requests_are_from_thread_ = std::thread::id();
- }
-
- if (socket_should_be_closed_when_request_is_done_ ||
- close_connection ||
- !ret ) {
- shutdown_ssl(socket_, true);
- shutdown_socket(socket_);
- close_socket(socket_);
- }
- }
-
- if (!ret) {
- if (error_ == Error::Success) { error_ = Error::Unknown; }
- }
-
- return ret;
-}
-
-inline bool ClientImpl::handle_request(Stream &strm, const Request &req,
- Response &res, bool close_connection) {
- if (req.path.empty()) {
- error_ = Error::Connection;
- return false;
- }
-
- bool ret;
-
- if (!is_ssl() && !proxy_host_.empty() && proxy_port_ != -1) {
- auto req2 = req;
- req2.path = "http://" + host_and_port_ + req.path;
- ret = process_request(strm, req2, res, close_connection);
- } else {
- ret = process_request(strm, req, res, close_connection);
- }
-
- if (!ret) { return false; }
-
- if (300 < res.status && res.status < 400 && follow_location_) {
- ret = redirect(req, res);
- }
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- if ((res.status == 401 || res.status == 407) &&
- req.authorization_count_ < 5) {
- auto is_proxy = res.status == 407;
- const auto &username =
- is_proxy ? proxy_digest_auth_username_ : digest_auth_username_;
- const auto &password =
- is_proxy ? proxy_digest_auth_password_ : digest_auth_password_;
-
- if (!username.empty() && !password.empty()) {
- std::map<std::string, std::string> auth;
- if (detail::parse_www_authenticate(res, auth, is_proxy)) {
- Request new_req = req;
- new_req.authorization_count_ += 1;
- auto key = is_proxy ? "Proxy-Authorization" : "Authorization";
- new_req.headers.erase(key);
- new_req.headers.insert(detail::make_digest_authentication_header(
- req, auth, new_req.authorization_count_, detail::random_string(10),
- username, password, is_proxy));
-
- Response new_res;
-
- ret = send(new_req, new_res);
- if (ret) { res = new_res; }
- }
- }
- }
-#endif
-
- return ret;
-}
-
-inline bool ClientImpl::redirect(const Request &req, Response &res) {
- if (req.redirect_count == 0) {
- error_ = Error::ExceedRedirectCount;
- return false;
- }
-
- auto location = detail::decode_url(res.get_header_value("location"), true);
- if (location.empty()) { return false; }
-
- const static std::regex re(
- R"(^(?:(https?):)?(?://([^:/?#]*)(?::(\d+))?)?([^?#]*(?:\?[^#]*)?)(?:#.*)?)");
-
- std::smatch m;
- if (!std::regex_match(location, m, re)) { return false; }
-
- auto scheme = is_ssl() ? "https" : "http";
-
- auto next_scheme = m[1].str();
- auto next_host = m[2].str();
- auto port_str = m[3].str();
- auto next_path = m[4].str();
-
- auto next_port = port_;
- if (!port_str.empty()) {
- next_port = std::stoi(port_str);
- } else if (!next_scheme.empty()) {
- next_port = next_scheme == "https" ? 443 : 80;
- }
-
- if (next_scheme.empty()) { next_scheme = scheme; }
- if (next_host.empty()) { next_host = host_; }
- if (next_path.empty()) { next_path = "/"; }
-
- if (next_scheme == scheme && next_host == host_ && next_port == port_) {
- return detail::redirect(*this, req, res, next_path);
- } else {
- if (next_scheme == "https") {
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- SSLClient cli(next_host.c_str(), next_port);
- cli.copy_settings(*this);
- auto ret = detail::redirect(cli, req, res, next_path);
- if (!ret) { error_ = cli.get_last_error(); }
- return ret;
-#else
- return false;
-#endif
- } else {
- ClientImpl cli(next_host.c_str(), next_port);
- cli.copy_settings(*this);
- auto ret = detail::redirect(cli, req, res, next_path);
- if (!ret) { error_ = cli.get_last_error(); }
- return ret;
- }
- }
-}
-
-inline bool ClientImpl::write_request(Stream &strm, const Request &req,
- bool close_connection) {
- detail::BufferStream bstrm;
-
- // Request line
- const auto &path = detail::encode_url(req.path);
-
- bstrm.write_format("%s %s HTTP/1.1\r\n", req.method.c_str(), path.c_str());
-
- // Additonal headers
- Headers headers;
- if (close_connection) { headers.emplace("Connection", "close"); }
-
- if (!req.has_header("Host")) {
- if (is_ssl()) {
- if (port_ == 443) {
- headers.emplace("Host", host_);
- } else {
- headers.emplace("Host", host_and_port_);
- }
- } else {
- if (port_ == 80) {
- headers.emplace("Host", host_);
- } else {
- headers.emplace("Host", host_and_port_);
- }
- }
- }
-
- if (!req.has_header("Accept")) { headers.emplace("Accept", "*/*"); }
-
- if (!req.has_header("User-Agent")) {
- headers.emplace("User-Agent", "cpp-httplib/0.7");
- }
-
- if (req.body.empty()) {
- if (req.content_provider) {
- auto length = std::to_string(req.content_length);
- headers.emplace("Content-Length", length);
- } else {
- if (req.method == "POST" || req.method == "PUT" ||
- req.method == "PATCH") {
- headers.emplace("Content-Length", "0");
- }
- }
- } else {
- if (!req.has_header("Content-Type")) {
- headers.emplace("Content-Type", "text/plain");
- }
-
- if (!req.has_header("Content-Length")) {
- auto length = std::to_string(req.body.size());
- headers.emplace("Content-Length", length);
- }
- }
-
- if (!basic_auth_password_.empty()) {
- headers.insert(make_basic_authentication_header(
- basic_auth_username_, basic_auth_password_, false));
- }
-
- if (!proxy_basic_auth_username_.empty() &&
- !proxy_basic_auth_password_.empty()) {
- headers.insert(make_basic_authentication_header(
- proxy_basic_auth_username_, proxy_basic_auth_password_, true));
- }
-
- if (!bearer_token_auth_token_.empty()) {
- headers.insert(make_bearer_token_authentication_header(
- bearer_token_auth_token_, false));
- }
-
- if (!proxy_bearer_token_auth_token_.empty()) {
- headers.insert(make_bearer_token_authentication_header(
- proxy_bearer_token_auth_token_, true));
- }
-
- detail::write_headers(bstrm, req, headers);
-
- // Flush buffer
- auto &data = bstrm.get_buffer();
- if (!detail::write_data(strm, data.data(), data.size())) {
- error_ = Error::Write;
- return false;
- }
-
- // Body
- if (req.body.empty()) {
- if (req.content_provider) {
- size_t offset = 0;
- size_t end_offset = req.content_length;
-
- bool ok = true;
-
- DataSink data_sink;
- data_sink.write = [&](const char *d, size_t l) {
- if (ok) {
- if (detail::write_data(strm, d, l)) {
- offset += l;
- } else {
- ok = false;
- }
- }
- };
- data_sink.is_writable = [&](void) { return ok && strm.is_writable(); };
-
- while (offset < end_offset) {
- if (!req.content_provider(offset, end_offset - offset, data_sink)) {
- error_ = Error::Canceled;
- return false;
- }
- if (!ok) {
- error_ = Error::Write;
- return false;
- }
- }
- }
- } else {
- return detail::write_data(strm, req.body.data(), req.body.size());
- }
-
- return true;
-}
-
-inline std::unique_ptr<Response> ClientImpl::send_with_content_provider(
- const char *method, const char *path, const Headers &headers,
- const std::string &body, size_t content_length,
- ContentProvider content_provider, const char *content_type) {
-
- Request req;
- req.method = method;
- req.headers = default_headers_;
- req.headers.insert(headers.begin(), headers.end());
- req.path = path;
-
- if (content_type) { req.headers.emplace("Content-Type", content_type); }
-
-#ifdef CPPHTTPLIB_ZLIB_SUPPORT
- if (compress_) {
- detail::gzip_compressor compressor;
-
- if (content_provider) {
- auto ok = true;
- size_t offset = 0;
-
- DataSink data_sink;
- data_sink.write = [&](const char *data, size_t data_len) {
- if (ok) {
- auto last = offset + data_len == content_length;
-
- auto ret = compressor.compress(
- data, data_len, last, [&](const char *data, size_t data_len) {
- req.body.append(data, data_len);
- return true;
- });
-
- if (ret) {
- offset += data_len;
- } else {
- ok = false;
- }
- }
- };
- data_sink.is_writable = [&](void) { return ok && true; };
-
- while (ok && offset < content_length) {
- if (!content_provider(offset, content_length - offset, data_sink)) {
- error_ = Error::Canceled;
- return nullptr;
- }
- }
- } else {
- if (!compressor.compress(body.data(), body.size(), true,
- [&](const char *data, size_t data_len) {
- req.body.append(data, data_len);
- return true;
- })) {
- return nullptr;
- }
- }
-
- req.headers.emplace("Content-Encoding", "gzip");
- } else
-#endif
- {
- if (content_provider) {
- req.content_length = content_length;
- req.content_provider = std::move(content_provider);
- } else {
- req.body = body;
- }
- }
-
- auto res = detail::make_unique<Response>();
-
- return send(req, *res) ? std::move(res) : nullptr;
-}
-
-inline bool ClientImpl::process_request(Stream &strm, const Request &req,
- Response &res, bool close_connection) {
- // Send request
- if (!write_request(strm, req, close_connection)) { return false; }
-
- // Receive response and headers
- if (!read_response_line(strm, res) ||
- !detail::read_headers(strm, res.headers)) {
- error_ = Error::Read;
- return false;
- }
-
- if (req.response_handler) {
- if (!req.response_handler(res)) {
- error_ = Error::Canceled;
- return false;
- }
- }
-
- // Body
- if (req.method != "HEAD" && req.method != "CONNECT") {
- auto out =
- req.content_receiver
- ? static_cast<ContentReceiverWithProgress>(
- [&](const char *buf, size_t n, uint64_t off, uint64_t len) {
- auto ret = req.content_receiver(buf, n, off, len);
- if (!ret) { error_ = Error::Canceled; }
- return ret;
- })
- : static_cast<ContentReceiverWithProgress>(
- [&](const char *buf, size_t n, uint64_t /*off*/,
- uint64_t /*len*/) {
- if (res.body.size() + n > res.body.max_size()) {
- return false;
- }
- res.body.append(buf, n);
- return true;
- });
-
- auto progress = [&](uint64_t current, uint64_t total) {
- if (!req.progress) { return true; }
- auto ret = req.progress(current, total);
- if (!ret) { error_ = Error::Canceled; }
- return ret;
- };
-
- int dummy_status;
- if (!detail::read_content(strm, res, (std::numeric_limits<size_t>::max)(),
- dummy_status, std::move(progress), std::move(out),
- decompress_)) {
- if (error_ != Error::Canceled) { error_ = Error::Read; }
- return false;
- }
- }
-
- if (res.get_header_value("Connection") == "close" ||
- (res.version == "HTTP/1.0" && res.reason != "Connection established")) {
- // TODO this requires a not-entirely-obvious chain of calls to be correct
- // for this to be safe. Maybe a code refactor (such as moving this out to
- // the send function and getting rid of the recursiveness of the mutex)
- // could make this more obvious.
-
- // This is safe to call because process_request is only called by handle_request
- // which is only called by send, which locks the request mutex during the process.
- // It would be a bug to call it from a different thread since it's a thread-safety
- // issue to do these things to the socket if another thread is using the socket.
- lock_socket_and_shutdown_and_close();
- }
-
- // Log
- if (logger_) { logger_(req, res); }
-
- return true;
-}
-
-inline bool
-ClientImpl::process_socket(const Socket &socket,
- std::function<bool(Stream &strm)> callback) {
- return detail::process_client_socket(
- socket.sock, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_,
- write_timeout_usec_, std::move(callback));
-}
-
-inline bool ClientImpl::is_ssl() const { return false; }
-
-inline Result ClientImpl::Get(const char *path) {
- return Get(path, Headers(), Progress());
-}
-
-inline Result ClientImpl::Get(const char *path, Progress progress) {
- return Get(path, Headers(), std::move(progress));
-}
-
-inline Result ClientImpl::Get(const char *path, const Headers &headers) {
- return Get(path, headers, Progress());
-}
-
-inline Result ClientImpl::Get(const char *path, const Headers &headers,
- Progress progress) {
- Request req;
- req.method = "GET";
- req.path = path;
- req.headers = default_headers_;
- req.headers.insert(headers.begin(), headers.end());
- req.progress = std::move(progress);
-
- auto res = detail::make_unique<Response>();
- auto ret = send(req, *res);
- return Result{ret ? std::move(res) : nullptr, get_last_error()};
-}
-
-inline Result ClientImpl::Get(const char *path,
- ContentReceiver content_receiver) {
- return Get(path, Headers(), nullptr, std::move(content_receiver), nullptr);
-}
-
-inline Result ClientImpl::Get(const char *path,
- ContentReceiver content_receiver,
- Progress progress) {
- return Get(path, Headers(), nullptr, std::move(content_receiver),
- std::move(progress));
-}
-
-inline Result ClientImpl::Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver) {
- return Get(path, headers, nullptr, std::move(content_receiver), nullptr);
-}
-
-inline Result ClientImpl::Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver,
- Progress progress) {
- return Get(path, headers, nullptr, std::move(content_receiver),
- std::move(progress));
-}
-
-inline Result ClientImpl::Get(const char *path,
- ResponseHandler response_handler,
- ContentReceiver content_receiver) {
- return Get(path, Headers(), std::move(response_handler),
- std::move(content_receiver), nullptr);
-}
-
-inline Result ClientImpl::Get(const char *path, const Headers &headers,
- ResponseHandler response_handler,
- ContentReceiver content_receiver) {
- return Get(path, headers, std::move(response_handler),
- std::move(content_receiver), nullptr);
-}
-
-inline Result ClientImpl::Get(const char *path,
- ResponseHandler response_handler,
- ContentReceiver content_receiver,
- Progress progress) {
- return Get(path, Headers(), std::move(response_handler),
- std::move(content_receiver), std::move(progress));
-}
-
-inline Result ClientImpl::Get(const char *path, const Headers &headers,
- ResponseHandler response_handler,
- ContentReceiver content_receiver,
- Progress progress) {
- Request req;
- req.method = "GET";
- req.path = path;
- req.headers = default_headers_;
- req.headers.insert(headers.begin(), headers.end());
- req.response_handler = std::move(response_handler);
- req.content_receiver =
- [content_receiver](const char *data, size_t data_length,
- uint64_t /*offset*/, uint64_t /*total_length*/) {
- return content_receiver(data, data_length);
- };
- req.progress = std::move(progress);
-
- auto res = detail::make_unique<Response>();
- auto ret = send(req, *res);
- return Result{ret ? std::move(res) : nullptr, get_last_error()};
-}
-
-inline Result ClientImpl::Head(const char *path) {
- return Head(path, Headers());
-}
-
-inline Result ClientImpl::Head(const char *path, const Headers &headers) {
- Request req;
- req.method = "HEAD";
- req.headers = default_headers_;
- req.headers.insert(headers.begin(), headers.end());
- req.path = path;
-
- auto res = detail::make_unique<Response>();
- auto ret = send(req, *res);
- return Result{ret ? std::move(res) : nullptr, get_last_error()};
-}
-
-inline Result ClientImpl::Post(const char *path) {
- return Post(path, std::string(), nullptr);
-}
-
-inline Result ClientImpl::Post(const char *path, const std::string &body,
- const char *content_type) {
- return Post(path, Headers(), body, content_type);
-}
-
-inline Result ClientImpl::Post(const char *path, const Headers &headers,
- const std::string &body,
- const char *content_type) {
- auto ret = send_with_content_provider("POST", path, headers, body, 0, nullptr,
- content_type);
- return Result{std::move(ret), get_last_error()};
-}
-
-inline Result ClientImpl::Post(const char *path, const Params &params) {
- return Post(path, Headers(), params);
-}
-
-inline Result ClientImpl::Post(const char *path, size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return Post(path, Headers(), content_length, std::move(content_provider),
- content_type);
-}
-
-inline Result ClientImpl::Post(const char *path, const Headers &headers,
- size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- auto ret = send_with_content_provider(
- "POST", path, headers, std::string(), content_length,
- std::move(content_provider), content_type);
- return Result{std::move(ret), get_last_error()};
-}
-
-inline Result ClientImpl::Post(const char *path, const Headers &headers,
- const Params &params) {
- auto query = detail::params_to_query_str(params);
- return Post(path, headers, query, "application/x-www-form-urlencoded");
-}
-
-inline Result ClientImpl::Post(const char *path,
- const MultipartFormDataItems &items) {
- return Post(path, Headers(), items);
-}
-
-inline Result ClientImpl::Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items) {
- return Post(path, headers, items, detail::make_multipart_data_boundary());
-}
-inline Result ClientImpl::Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items,
- const std::string &boundary) {
- for (size_t i = 0; i < boundary.size(); i++) {
- char c = boundary[i];
- if (!std::isalnum(c) && c != '-' && c != '_') {
- error_ = Error::UnsupportedMultipartBoundaryChars;
- return Result{nullptr, error_};
- }
- }
-
- std::string body;
-
- for (const auto &item : items) {
- body += "--" + boundary + "\r\n";
- body += "Content-Disposition: form-data; name=\"" + item.name + "\"";
- if (!item.filename.empty()) {
- body += "; filename=\"" + item.filename + "\"";
- }
- body += "\r\n";
- if (!item.content_type.empty()) {
- body += "Content-Type: " + item.content_type + "\r\n";
- }
- body += "\r\n";
- body += item.content + "\r\n";
- }
-
- body += "--" + boundary + "--\r\n";
-
- std::string content_type = "multipart/form-data; boundary=" + boundary;
- return Post(path, headers, body, content_type.c_str());
-}
-
-inline Result ClientImpl::Put(const char *path) {
- return Put(path, std::string(), nullptr);
-}
-
-inline Result ClientImpl::Put(const char *path, const std::string &body,
- const char *content_type) {
- return Put(path, Headers(), body, content_type);
-}
-
-inline Result ClientImpl::Put(const char *path, const Headers &headers,
- const std::string &body,
- const char *content_type) {
- auto ret = send_with_content_provider("PUT", path, headers, body, 0, nullptr,
- content_type);
- return Result{std::move(ret), get_last_error()};
-}
-
-inline Result ClientImpl::Put(const char *path, size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return Put(path, Headers(), content_length, std::move(content_provider),
- content_type);
-}
-
-inline Result ClientImpl::Put(const char *path, const Headers &headers,
- size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- auto ret = send_with_content_provider(
- "PUT", path, headers, std::string(), content_length,
- std::move(content_provider), content_type);
- return Result{std::move(ret), get_last_error()};
-}
-
-inline Result ClientImpl::Put(const char *path, const Params &params) {
- return Put(path, Headers(), params);
-}
-
-inline Result ClientImpl::Put(const char *path, const Headers &headers,
- const Params &params) {
- auto query = detail::params_to_query_str(params);
- return Put(path, headers, query, "application/x-www-form-urlencoded");
-}
-
-inline Result ClientImpl::Patch(const char *path, const std::string &body,
- const char *content_type) {
- return Patch(path, Headers(), body, content_type);
-}
-
-inline Result ClientImpl::Patch(const char *path, const Headers &headers,
- const std::string &body,
- const char *content_type) {
- auto ret = send_with_content_provider("PATCH", path, headers, body, 0,
- nullptr, content_type);
- return Result{std::move(ret), get_last_error()};
-}
-
-inline Result ClientImpl::Patch(const char *path, size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return Patch(path, Headers(), content_length, std::move(content_provider),
- content_type);
-}
-
-inline Result ClientImpl::Patch(const char *path, const Headers &headers,
- size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- auto ret = send_with_content_provider(
- "PATCH", path, headers, std::string(), content_length,
- std::move(content_provider), content_type);
- return Result{std::move(ret), get_last_error()};
-}
-
-inline Result ClientImpl::Delete(const char *path) {
- return Delete(path, Headers(), std::string(), nullptr);
-}
-
-inline Result ClientImpl::Delete(const char *path, const std::string &body,
- const char *content_type) {
- return Delete(path, Headers(), body, content_type);
-}
-
-inline Result ClientImpl::Delete(const char *path, const Headers &headers) {
- return Delete(path, headers, std::string(), nullptr);
-}
-
-inline Result ClientImpl::Delete(const char *path, const Headers &headers,
- const std::string &body,
- const char *content_type) {
- Request req;
- req.method = "DELETE";
- req.headers = default_headers_;
- req.headers.insert(headers.begin(), headers.end());
- req.path = path;
-
- if (content_type) { req.headers.emplace("Content-Type", content_type); }
- req.body = body;
-
- auto res = detail::make_unique<Response>();
- auto ret = send(req, *res);
- return Result{ret ? std::move(res) : nullptr, get_last_error()};
-}
-
-inline Result ClientImpl::Options(const char *path) {
- return Options(path, Headers());
-}
-
-inline Result ClientImpl::Options(const char *path, const Headers &headers) {
- Request req;
- req.method = "OPTIONS";
- req.headers = default_headers_;
- req.headers.insert(headers.begin(), headers.end());
- req.path = path;
-
- auto res = detail::make_unique<Response>();
- auto ret = send(req, *res);
- return Result{ret ? std::move(res) : nullptr, get_last_error()};
-}
-
-inline size_t ClientImpl::is_socket_open() const {
- std::lock_guard<std::mutex> guard(socket_mutex_);
- return socket_.is_open();
-}
-
-inline void ClientImpl::stop() {
- std::lock_guard<std::mutex> guard(socket_mutex_);
- // There is no guarantee that this doesn't get overwritten later, but set it so that
- // there is a good chance that any threads stopping as a result pick up this error.
- error_ = Error::Canceled;
-
- // If there is anything ongoing right now, the ONLY thread-safe thing we can do
- // is to shutdown_socket, so that threads using this socket suddenly discover
- // they can't read/write any more and error out.
- // Everything else (closing the socket, shutting ssl down) is unsafe because these
- // actions are not thread-safe.
- if (socket_requests_in_flight_ > 0) {
- shutdown_socket(socket_);
- // Aside from that, we set a flag for the socket to be closed when we're done.
- socket_should_be_closed_when_request_is_done_ = true;
- return;
- }
-
- //Otherwise, sitll holding the mutex, we can shut everything down ourselves
- shutdown_ssl(socket_, true);
- shutdown_socket(socket_);
- close_socket(socket_);
-}
-
-inline void ClientImpl::set_connection_timeout(time_t sec, time_t usec) {
- connection_timeout_sec_ = sec;
- connection_timeout_usec_ = usec;
-}
-
-inline void ClientImpl::set_read_timeout(time_t sec, time_t usec) {
- read_timeout_sec_ = sec;
- read_timeout_usec_ = usec;
-}
-
-inline void ClientImpl::set_write_timeout(time_t sec, time_t usec) {
- write_timeout_sec_ = sec;
- write_timeout_usec_ = usec;
-}
-
-inline void ClientImpl::set_basic_auth(const char *username,
- const char *password) {
- basic_auth_username_ = username;
- basic_auth_password_ = password;
-}
-
-inline void ClientImpl::set_bearer_token_auth(const char *token) {
- bearer_token_auth_token_ = token;
-}
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void ClientImpl::set_digest_auth(const char *username,
- const char *password) {
- digest_auth_username_ = username;
- digest_auth_password_ = password;
-}
-#endif
-
-inline void ClientImpl::set_keep_alive(bool on) { keep_alive_ = on; }
-
-inline void ClientImpl::set_follow_location(bool on) { follow_location_ = on; }
-
-inline void ClientImpl::set_default_headers(Headers headers) {
- default_headers_ = std::move(headers);
-}
-
-inline void ClientImpl::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; }
-
-inline void ClientImpl::set_socket_options(SocketOptions socket_options) {
- socket_options_ = std::move(socket_options);
-}
-
-inline void ClientImpl::set_compress(bool on) { compress_ = on; }
-
-inline void ClientImpl::set_decompress(bool on) { decompress_ = on; }
-
-inline void ClientImpl::set_interface(const char *intf) { interface_ = intf; }
-
-inline void ClientImpl::set_proxy(const char *host, int port) {
- proxy_host_ = host;
- proxy_port_ = port;
-}
-
-inline void ClientImpl::set_proxy_basic_auth(const char *username,
- const char *password) {
- proxy_basic_auth_username_ = username;
- proxy_basic_auth_password_ = password;
-}
-
-inline void ClientImpl::set_proxy_bearer_token_auth(const char *token) {
- proxy_bearer_token_auth_token_ = token;
-}
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void ClientImpl::set_proxy_digest_auth(const char *username,
- const char *password) {
- proxy_digest_auth_username_ = username;
- proxy_digest_auth_password_ = password;
-}
-#endif
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void ClientImpl::enable_server_certificate_verification(bool enabled) {
- server_certificate_verification_ = enabled;
-}
-#endif
-
-inline void ClientImpl::set_logger(Logger logger) {
- logger_ = std::move(logger);
-}
-
-/*
- * SSL Implementation
- */
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-namespace detail {
-
-template <typename U, typename V>
-inline SSL *ssl_new(socket_t sock, SSL_CTX *ctx, std::mutex &ctx_mutex,
- U SSL_connect_or_accept, V setup) {
- SSL *ssl = nullptr;
- {
- std::lock_guard<std::mutex> guard(ctx_mutex);
- ssl = SSL_new(ctx);
- }
-
- if (ssl) {
- auto bio = BIO_new_socket(static_cast<int>(sock), BIO_NOCLOSE);
- SSL_set_bio(ssl, bio, bio);
-
- if (!setup(ssl) || SSL_connect_or_accept(ssl) != 1) {
- SSL_shutdown(ssl);
- {
- std::lock_guard<std::mutex> guard(ctx_mutex);
- SSL_free(ssl);
- }
- return nullptr;
- }
- }
-
- return ssl;
-}
-
-inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl,
- bool shutdown_gracefully) {
- // sometimes we may want to skip this to try to avoid SIGPIPE if we know
- // the remote has closed the network connection
- // Note that it is not always possible to avoid SIGPIPE, this is merely a best-efforts.
- if (shutdown_gracefully) {
- SSL_shutdown(ssl);
- }
-
- std::lock_guard<std::mutex> guard(ctx_mutex);
- SSL_free(ssl);
-}
-
-template <typename T>
-inline bool
-process_server_socket_ssl(SSL *ssl, socket_t sock, size_t keep_alive_max_count,
- time_t keep_alive_timeout_sec,
- time_t read_timeout_sec, time_t read_timeout_usec,
- time_t write_timeout_sec, time_t write_timeout_usec,
- T callback) {
- return process_server_socket_core(
- sock, keep_alive_max_count, keep_alive_timeout_sec,
- [&](bool close_connection, bool &connection_closed) {
- SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec,
- write_timeout_sec, write_timeout_usec);
- return callback(strm, close_connection, connection_closed);
- });
-}
-
-template <typename T>
-inline bool
-process_client_socket_ssl(SSL *ssl, socket_t sock, time_t read_timeout_sec,
- time_t read_timeout_usec, time_t write_timeout_sec,
- time_t write_timeout_usec, T callback) {
- SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec,
- write_timeout_sec, write_timeout_usec);
- return callback(strm);
-}
-
-#if OPENSSL_VERSION_NUMBER < 0x10100000L
-static std::shared_ptr<std::vector<std::mutex>> openSSL_locks_;
-
-class SSLThreadLocks {
-public:
- SSLThreadLocks() {
- openSSL_locks_ =
- std::make_shared<std::vector<std::mutex>>(CRYPTO_num_locks());
- CRYPTO_set_locking_callback(locking_callback);
- }
-
- ~SSLThreadLocks() { CRYPTO_set_locking_callback(nullptr); }
-
-private:
- static void locking_callback(int mode, int type, const char * /*file*/,
- int /*line*/) {
- auto &lk = (*openSSL_locks_)[static_cast<size_t>(type)];
- if (mode & CRYPTO_LOCK) {
- lk.lock();
- } else {
- lk.unlock();
- }
- }
-};
-
-#endif
-
-class SSLInit {
-public:
- SSLInit() {
-#if OPENSSL_VERSION_NUMBER < 0x1010001fL
- SSL_load_error_strings();
- SSL_library_init();
-#else
- OPENSSL_init_ssl(
- OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL);
-#endif
- }
-
- ~SSLInit() {
-#if OPENSSL_VERSION_NUMBER < 0x1010001fL
- ERR_free_strings();
-#endif
- }
-
-private:
-#if OPENSSL_VERSION_NUMBER < 0x10100000L
- SSLThreadLocks thread_init_;
-#endif
-};
-
-// SSL socket stream implementation
-inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl,
- time_t read_timeout_sec,
- time_t read_timeout_usec,
- time_t write_timeout_sec,
- time_t write_timeout_usec)
- : sock_(sock), ssl_(ssl), read_timeout_sec_(read_timeout_sec),
- read_timeout_usec_(read_timeout_usec),
- write_timeout_sec_(write_timeout_sec),
- write_timeout_usec_(write_timeout_usec) {
- SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY);
-}
-
-inline SSLSocketStream::~SSLSocketStream() {}
-
-inline bool SSLSocketStream::is_readable() const {
- return detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0;
-}
-
-inline bool SSLSocketStream::is_writable() const {
- return detail::select_write(sock_, write_timeout_sec_, write_timeout_usec_) >
- 0;
-}
-
-inline ssize_t SSLSocketStream::read(char *ptr, size_t size) {
- if (SSL_pending(ssl_) > 0) {
- return SSL_read(ssl_, ptr, static_cast<int>(size));
- } else if (is_readable()) {
- auto ret = SSL_read(ssl_, ptr, static_cast<int>(size));
- if (ret < 0) {
- auto err = SSL_get_error(ssl_, ret);
- while (err == SSL_ERROR_WANT_READ) {
- if (SSL_pending(ssl_) > 0) {
- return SSL_read(ssl_, ptr, static_cast<int>(size));
- } else if (is_readable()) {
- ret = SSL_read(ssl_, ptr, static_cast<int>(size));
- if (ret >= 0) {
- return ret;
- }
- err = SSL_get_error(ssl_, ret);
- } else {
- return -1;
- }
- }
- }
- return ret;
- }
- return -1;
-}
-
-inline ssize_t SSLSocketStream::write(const char *ptr, size_t size) {
- if (is_writable()) { return SSL_write(ssl_, ptr, static_cast<int>(size)); }
- return -1;
-}
-
-inline void SSLSocketStream::get_remote_ip_and_port(std::string &ip,
- int &port) const {
- detail::get_remote_ip_and_port(sock_, ip, port);
-}
-
-static SSLInit sslinit_;
-
-} // namespace detail
-
-// SSL HTTP server implementation
-inline SSLServer::SSLServer(const char *cert_path, const char *private_key_path,
- const char *client_ca_cert_file_path,
- const char *client_ca_cert_dir_path) {
- ctx_ = SSL_CTX_new(SSLv23_server_method());
-
- if (ctx_) {
- SSL_CTX_set_options(ctx_,
- SSL_OP_ALL | SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 |
- SSL_OP_NO_COMPRESSION |
- SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION);
-
- // auto ecdh = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1);
- // SSL_CTX_set_tmp_ecdh(ctx_, ecdh);
- // EC_KEY_free(ecdh);
-
- if (SSL_CTX_use_certificate_chain_file(ctx_, cert_path) != 1 ||
- SSL_CTX_use_PrivateKey_file(ctx_, private_key_path, SSL_FILETYPE_PEM) !=
- 1) {
- SSL_CTX_free(ctx_);
- ctx_ = nullptr;
- } else if (client_ca_cert_file_path || client_ca_cert_dir_path) {
- // if (client_ca_cert_file_path) {
- // auto list = SSL_load_client_CA_file(client_ca_cert_file_path);
- // SSL_CTX_set_client_CA_list(ctx_, list);
- // }
-
- SSL_CTX_load_verify_locations(ctx_, client_ca_cert_file_path,
- client_ca_cert_dir_path);
-
- SSL_CTX_set_verify(
- ctx_,
- SSL_VERIFY_PEER |
- SSL_VERIFY_FAIL_IF_NO_PEER_CERT, // SSL_VERIFY_CLIENT_ONCE,
- nullptr);
- }
- }
-}
-
-inline SSLServer::SSLServer(X509 *cert, EVP_PKEY *private_key,
- X509_STORE *client_ca_cert_store) {
- ctx_ = SSL_CTX_new(SSLv23_server_method());
-
- if (ctx_) {
- SSL_CTX_set_options(ctx_,
- SSL_OP_ALL | SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 |
- SSL_OP_NO_COMPRESSION |
- SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION);
-
- if (SSL_CTX_use_certificate(ctx_, cert) != 1 ||
- SSL_CTX_use_PrivateKey(ctx_, private_key) != 1) {
- SSL_CTX_free(ctx_);
- ctx_ = nullptr;
- } else if (client_ca_cert_store) {
-
- SSL_CTX_set_cert_store(ctx_, client_ca_cert_store);
-
- SSL_CTX_set_verify(
- ctx_,
- SSL_VERIFY_PEER |
- SSL_VERIFY_FAIL_IF_NO_PEER_CERT, // SSL_VERIFY_CLIENT_ONCE,
- nullptr);
- }
- }
-}
-
-inline SSLServer::~SSLServer() {
- if (ctx_) { SSL_CTX_free(ctx_); }
-}
-
-inline bool SSLServer::is_valid() const { return ctx_; }
-
-inline bool SSLServer::process_and_close_socket(socket_t sock) {
- auto ssl = detail::ssl_new(sock, ctx_, ctx_mutex_, SSL_accept,
- [](SSL * /*ssl*/) { return true; });
-
- if (ssl) {
- auto ret = detail::process_server_socket_ssl(
- ssl, sock, keep_alive_max_count_, keep_alive_timeout_sec_,
- read_timeout_sec_, read_timeout_usec_, write_timeout_sec_,
- write_timeout_usec_,
- [this, ssl](Stream &strm, bool close_connection,
- bool &connection_closed) {
- return process_request(strm, close_connection, connection_closed,
- [&](Request &req) { req.ssl = ssl; });
- });
-
- detail::ssl_delete(ctx_mutex_, ssl, ret);
- detail::shutdown_socket(sock);
- detail::close_socket(sock);
- return ret;
- }
-
- detail::shutdown_socket(sock);
- detail::close_socket(sock);
- return false;
-}
-
-// SSL HTTP client implementation
-inline SSLClient::SSLClient(const std::string &host)
- : SSLClient(host, 443, std::string(), std::string()) {}
-
-inline SSLClient::SSLClient(const std::string &host, int port)
- : SSLClient(host, port, std::string(), std::string()) {}
-
-inline SSLClient::SSLClient(const std::string &host, int port,
- const std::string &client_cert_path,
- const std::string &client_key_path)
- : ClientImpl(host, port, client_cert_path, client_key_path) {
- ctx_ = SSL_CTX_new(SSLv23_client_method());
-
- detail::split(&host_[0], &host_[host_.size()], '.',
- [&](const char *b, const char *e) {
- host_components_.emplace_back(std::string(b, e));
- });
- if (!client_cert_path.empty() && !client_key_path.empty()) {
- if (SSL_CTX_use_certificate_file(ctx_, client_cert_path.c_str(),
- SSL_FILETYPE_PEM) != 1 ||
- SSL_CTX_use_PrivateKey_file(ctx_, client_key_path.c_str(),
- SSL_FILETYPE_PEM) != 1) {
- SSL_CTX_free(ctx_);
- ctx_ = nullptr;
- }
- }
-}
-
-inline SSLClient::SSLClient(const std::string &host, int port,
- X509 *client_cert, EVP_PKEY *client_key)
- : ClientImpl(host, port) {
- ctx_ = SSL_CTX_new(SSLv23_client_method());
-
- detail::split(&host_[0], &host_[host_.size()], '.',
- [&](const char *b, const char *e) {
- host_components_.emplace_back(std::string(b, e));
- });
- if (client_cert != nullptr && client_key != nullptr) {
- if (SSL_CTX_use_certificate(ctx_, client_cert) != 1 ||
- SSL_CTX_use_PrivateKey(ctx_, client_key) != 1) {
- SSL_CTX_free(ctx_);
- ctx_ = nullptr;
- }
- }
-}
-
-inline SSLClient::~SSLClient() {
- if (ctx_) { SSL_CTX_free(ctx_); }
- // Make sure to shut down SSL since shutdown_ssl will resolve to the
- // base function rather than the derived function once we get to the
- // base class destructor, and won't free the SSL (causing a leak).
- SSLClient::shutdown_ssl(socket_, true);
-}
-
-inline bool SSLClient::is_valid() const { return ctx_; }
-
-inline void SSLClient::set_ca_cert_path(const char *ca_cert_file_path,
- const char *ca_cert_dir_path) {
- if (ca_cert_file_path) { ca_cert_file_path_ = ca_cert_file_path; }
- if (ca_cert_dir_path) { ca_cert_dir_path_ = ca_cert_dir_path; }
-}
-
-inline void SSLClient::set_ca_cert_store(X509_STORE *ca_cert_store) {
- if (ca_cert_store) {
- if (ctx_) {
- if (SSL_CTX_get_cert_store(ctx_) != ca_cert_store) {
- // Free memory allocated for old cert and use new store `ca_cert_store`
- SSL_CTX_set_cert_store(ctx_, ca_cert_store);
- }
- } else {
- X509_STORE_free(ca_cert_store);
- }
- }
-}
-
-inline long SSLClient::get_openssl_verify_result() const {
- return verify_result_;
-}
-
-inline SSL_CTX *SSLClient::ssl_context() const { return ctx_; }
-
-inline bool SSLClient::create_and_connect_socket(Socket &socket) {
- return is_valid() && ClientImpl::create_and_connect_socket(socket);
-}
-
-// Assumes that socket_mutex_ is locked and that there are no requests in flight
-inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res,
- bool &success) {
- success = true;
- Response res2;
- if (!detail::process_client_socket(
- socket.sock, read_timeout_sec_, read_timeout_usec_,
- write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) {
- Request req2;
- req2.method = "CONNECT";
- req2.path = host_and_port_;
- return process_request(strm, req2, res2, false);
- })) {
- // Thread-safe to close everything because we are assuming there are no requests in flight
- shutdown_ssl(socket, true);
- shutdown_socket(socket);
- close_socket(socket);
- success = false;
- return false;
- }
-
- if (res2.status == 407) {
- if (!proxy_digest_auth_username_.empty() &&
- !proxy_digest_auth_password_.empty()) {
- std::map<std::string, std::string> auth;
- if (detail::parse_www_authenticate(res2, auth, true)) {
- Response res3;
- if (!detail::process_client_socket(
- socket.sock, read_timeout_sec_, read_timeout_usec_,
- write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) {
- Request req3;
- req3.method = "CONNECT";
- req3.path = host_and_port_;
- req3.headers.insert(detail::make_digest_authentication_header(
- req3, auth, 1, detail::random_string(10),
- proxy_digest_auth_username_, proxy_digest_auth_password_,
- true));
- return process_request(strm, req3, res3, false);
- })) {
- // Thread-safe to close everything because we are assuming there are no requests in flight
- shutdown_ssl(socket, true);
- shutdown_socket(socket);
- close_socket(socket);
- success = false;
- return false;
- }
- }
- } else {
- res = res2;
- return false;
- }
- }
-
- return true;
-}
-
-inline bool SSLClient::load_certs() {
- bool ret = true;
-
- std::call_once(initialize_cert_, [&]() {
- std::lock_guard<std::mutex> guard(ctx_mutex_);
- if (!ca_cert_file_path_.empty()) {
- if (!SSL_CTX_load_verify_locations(ctx_, ca_cert_file_path_.c_str(),
- nullptr)) {
- ret = false;
- }
- } else if (!ca_cert_dir_path_.empty()) {
- if (!SSL_CTX_load_verify_locations(ctx_, nullptr,
- ca_cert_dir_path_.c_str())) {
- ret = false;
- }
- } else {
-#ifdef _WIN32
- detail::load_system_certs_on_windows(SSL_CTX_get_cert_store(ctx_));
-#else
- SSL_CTX_set_default_verify_paths(ctx_);
-#endif
- }
- });
-
- return ret;
-}
-
-inline bool SSLClient::initialize_ssl(Socket &socket) {
- auto ssl = detail::ssl_new(
- socket.sock, ctx_, ctx_mutex_,
- [&](SSL *ssl) {
- if (server_certificate_verification_) {
- if (!load_certs()) {
- error_ = Error::SSLLoadingCerts;
- return false;
- }
- SSL_set_verify(ssl, SSL_VERIFY_NONE, nullptr);
- }
-
- if (SSL_connect(ssl) != 1) {
- error_ = Error::SSLConnection;
- return false;
- }
-
- if (server_certificate_verification_) {
- verify_result_ = SSL_get_verify_result(ssl);
-
- if (verify_result_ != X509_V_OK) {
- error_ = Error::SSLServerVerification;
- return false;
- }
-
- auto server_cert = SSL_get_peer_certificate(ssl);
-
- if (server_cert == nullptr) {
- error_ = Error::SSLServerVerification;
- return false;
- }
-
- if (!verify_host(server_cert)) {
- X509_free(server_cert);
- error_ = Error::SSLServerVerification;
- return false;
- }
- X509_free(server_cert);
- }
-
- return true;
- },
- [&](SSL *ssl) {
- SSL_set_tlsext_host_name(ssl, host_.c_str());
- return true;
- });
-
- if (ssl) {
- socket.ssl = ssl;
- return true;
- }
-
- shutdown_socket(socket);
- close_socket(socket);
- return false;
-}
-
-inline void SSLClient::shutdown_ssl(Socket &socket, bool shutdown_gracefully) {
- if (socket.sock == INVALID_SOCKET) {
- assert(socket.ssl == nullptr);
- return;
- }
- if (socket.ssl) {
- detail::ssl_delete(ctx_mutex_, socket.ssl, shutdown_gracefully);
- socket.ssl = nullptr;
- }
- assert(socket.ssl == nullptr);
-}
-
-inline bool
-SSLClient::process_socket(const Socket &socket,
- std::function<bool(Stream &strm)> callback) {
- assert(socket.ssl);
- return detail::process_client_socket_ssl(
- socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_,
- write_timeout_sec_, write_timeout_usec_, std::move(callback));
-}
-
-inline bool SSLClient::is_ssl() const { return true; }
-
-inline bool SSLClient::verify_host(X509 *server_cert) const {
- /* Quote from RFC2818 section 3.1 "Server Identity"
-
- If a subjectAltName extension of type dNSName is present, that MUST
- be used as the identity. Otherwise, the (most specific) Common Name
- field in the Subject field of the certificate MUST be used. Although
- the use of the Common Name is existing practice, it is deprecated and
- Certification Authorities are encouraged to use the dNSName instead.
-
- Matching is performed using the matching rules specified by
- [RFC2459]. If more than one identity of a given type is present in
- the certificate (e.g., more than one dNSName name, a match in any one
- of the set is considered acceptable.) Names may contain the wildcard
- character * which is considered to match any single domain name
- component or component fragment. E.g., *.a.com matches foo.a.com but
- not bar.foo.a.com. f*.com matches foo.com but not bar.com.
-
- In some cases, the URI is specified as an IP address rather than a
- hostname. In this case, the iPAddress subjectAltName must be present
- in the certificate and must exactly match the IP in the URI.
-
- */
- return verify_host_with_subject_alt_name(server_cert) ||
- verify_host_with_common_name(server_cert);
-}
-
-inline bool
-SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const {
- auto ret = false;
-
- auto type = GEN_DNS;
-
- struct in6_addr addr6;
- struct in_addr addr;
- size_t addr_len = 0;
-
-#ifndef __MINGW32__
- if (inet_pton(AF_INET6, host_.c_str(), &addr6)) {
- type = GEN_IPADD;
- addr_len = sizeof(struct in6_addr);
- } else if (inet_pton(AF_INET, host_.c_str(), &addr)) {
- type = GEN_IPADD;
- addr_len = sizeof(struct in_addr);
- }
-#endif
-
- auto alt_names = static_cast<const struct stack_st_GENERAL_NAME *>(
- X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr));
-
- if (alt_names) {
- auto dsn_matched = false;
- auto ip_mached = false;
-
- auto count = sk_GENERAL_NAME_num(alt_names);
-
- for (decltype(count) i = 0; i < count && !dsn_matched; i++) {
- auto val = sk_GENERAL_NAME_value(alt_names, i);
- if (val->type == type) {
- auto name = (const char *)ASN1_STRING_get0_data(val->d.ia5);
- auto name_len = (size_t)ASN1_STRING_length(val->d.ia5);
-
- if (strlen(name) == name_len) {
- switch (type) {
- case GEN_DNS: dsn_matched = check_host_name(name, name_len); break;
-
- case GEN_IPADD:
- if (!memcmp(&addr6, name, addr_len) ||
- !memcmp(&addr, name, addr_len)) {
- ip_mached = true;
- }
- break;
- }
- }
- }
- }
-
- if (dsn_matched || ip_mached) { ret = true; }
- }
-
- GENERAL_NAMES_free((STACK_OF(GENERAL_NAME) *)alt_names);
- return ret;
-}
-
-inline bool SSLClient::verify_host_with_common_name(X509 *server_cert) const {
- const auto subject_name = X509_get_subject_name(server_cert);
-
- if (subject_name != nullptr) {
- char name[BUFSIZ];
- auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName,
- name, sizeof(name));
-
- if (name_len != -1) {
- return check_host_name(name, static_cast<size_t>(name_len));
- }
- }
-
- return false;
-}
-
-inline bool SSLClient::check_host_name(const char *pattern,
- size_t pattern_len) const {
- if (host_.size() == pattern_len && host_ == pattern) { return true; }
-
- // Wildcard match
- // https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484
- std::vector<std::string> pattern_components;
- detail::split(&pattern[0], &pattern[pattern_len], '.',
- [&](const char *b, const char *e) {
- pattern_components.emplace_back(std::string(b, e));
- });
-
- if (host_components_.size() != pattern_components.size()) { return false; }
-
- auto itr = pattern_components.begin();
- for (const auto &h : host_components_) {
- auto &p = *itr;
- if (p != h && p != "*") {
- auto partial_match = (p.size() > 0 && p[p.size() - 1] == '*' &&
- !p.compare(0, p.size() - 1, h));
- if (!partial_match) { return false; }
- }
- ++itr;
- }
-
- return true;
-}
-#endif
-
-// Universal client implementation
-inline Client::Client(const char *scheme_host_port)
- : Client(scheme_host_port, std::string(), std::string()) {}
-
-inline Client::Client(const char *scheme_host_port,
- const std::string &client_cert_path,
- const std::string &client_key_path) {
- const static std::regex re(R"(^(?:([a-z]+)://)?([^:/?#]+)(?::(\d+))?)");
-
- std::cmatch m;
- if (std::regex_match(scheme_host_port, m, re)) {
- auto scheme = m[1].str();
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- if (!scheme.empty() && (scheme != "http" && scheme != "https")) {
-#else
- if (!scheme.empty() && scheme != "http") {
-#endif
- std::string msg = "'" + scheme + "' scheme is not supported.";
- throw std::invalid_argument(msg);
- return;
- }
-
- auto is_ssl = scheme == "https";
-
- auto host = m[2].str();
-
- auto port_str = m[3].str();
- auto port = !port_str.empty() ? std::stoi(port_str) : (is_ssl ? 443 : 80);
-
- if (is_ssl) {
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
- cli_ = detail::make_unique<SSLClient>(host.c_str(), port,
- client_cert_path, client_key_path);
- is_ssl_ = is_ssl;
-#endif
- } else {
- cli_ = detail::make_unique<ClientImpl>(host.c_str(), port,
- client_cert_path, client_key_path);
- }
- } else {
- cli_ = detail::make_unique<ClientImpl>(scheme_host_port, 80,
- client_cert_path, client_key_path);
- }
-}
-
-inline Client::Client(const std::string &host, int port)
- : cli_(detail::make_unique<ClientImpl>(host, port)) {}
-
-inline Client::Client(const std::string &host, int port,
- const std::string &client_cert_path,
- const std::string &client_key_path)
- : cli_(detail::make_unique<ClientImpl>(host, port, client_cert_path,
- client_key_path)) {}
-
-inline Client::~Client() {}
-
-inline bool Client::is_valid() const {
- return cli_ != nullptr && cli_->is_valid();
-}
-
-inline Result Client::Get(const char *path) { return cli_->Get(path); }
-inline Result Client::Get(const char *path, const Headers &headers) {
- return cli_->Get(path, headers);
-}
-inline Result Client::Get(const char *path, Progress progress) {
- return cli_->Get(path, std::move(progress));
-}
-inline Result Client::Get(const char *path, const Headers &headers,
- Progress progress) {
- return cli_->Get(path, headers, std::move(progress));
-}
-inline Result Client::Get(const char *path, ContentReceiver content_receiver) {
- return cli_->Get(path, std::move(content_receiver));
-}
-inline Result Client::Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver) {
- return cli_->Get(path, headers, std::move(content_receiver));
-}
-inline Result Client::Get(const char *path, ContentReceiver content_receiver,
- Progress progress) {
- return cli_->Get(path, std::move(content_receiver), std::move(progress));
-}
-inline Result Client::Get(const char *path, const Headers &headers,
- ContentReceiver content_receiver, Progress progress) {
- return cli_->Get(path, headers, std::move(content_receiver),
- std::move(progress));
-}
-inline Result Client::Get(const char *path, ResponseHandler response_handler,
- ContentReceiver content_receiver) {
- return cli_->Get(path, std::move(response_handler),
- std::move(content_receiver));
-}
-inline Result Client::Get(const char *path, const Headers &headers,
- ResponseHandler response_handler,
- ContentReceiver content_receiver) {
- return cli_->Get(path, headers, std::move(response_handler),
- std::move(content_receiver));
-}
-inline Result Client::Get(const char *path, ResponseHandler response_handler,
- ContentReceiver content_receiver, Progress progress) {
- return cli_->Get(path, std::move(response_handler),
- std::move(content_receiver), std::move(progress));
-}
-inline Result Client::Get(const char *path, const Headers &headers,
- ResponseHandler response_handler,
- ContentReceiver content_receiver, Progress progress) {
- return cli_->Get(path, headers, std::move(response_handler),
- std::move(content_receiver), std::move(progress));
-}
-
-inline Result Client::Head(const char *path) { return cli_->Head(path); }
-inline Result Client::Head(const char *path, const Headers &headers) {
- return cli_->Head(path, headers);
-}
-
-inline Result Client::Post(const char *path) { return cli_->Post(path); }
-inline Result Client::Post(const char *path, const std::string &body,
- const char *content_type) {
- return cli_->Post(path, body, content_type);
-}
-inline Result Client::Post(const char *path, const Headers &headers,
- const std::string &body, const char *content_type) {
- return cli_->Post(path, headers, body, content_type);
-}
-inline Result Client::Post(const char *path, size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return cli_->Post(path, content_length, std::move(content_provider),
- content_type);
-}
-inline Result Client::Post(const char *path, const Headers &headers,
- size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return cli_->Post(path, headers, content_length, std::move(content_provider),
- content_type);
-}
-inline Result Client::Post(const char *path, const Params &params) {
- return cli_->Post(path, params);
-}
-inline Result Client::Post(const char *path, const Headers &headers,
- const Params &params) {
- return cli_->Post(path, headers, params);
-}
-inline Result Client::Post(const char *path,
- const MultipartFormDataItems &items) {
- return cli_->Post(path, items);
-}
-inline Result Client::Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items) {
- return cli_->Post(path, headers, items);
-}
-inline Result Client::Post(const char *path, const Headers &headers,
- const MultipartFormDataItems &items,
- const std::string &boundary) {
- return cli_->Post(path, headers, items, boundary);
-}
-inline Result Client::Put(const char *path) { return cli_->Put(path); }
-inline Result Client::Put(const char *path, const std::string &body,
- const char *content_type) {
- return cli_->Put(path, body, content_type);
-}
-inline Result Client::Put(const char *path, const Headers &headers,
- const std::string &body, const char *content_type) {
- return cli_->Put(path, headers, body, content_type);
-}
-inline Result Client::Put(const char *path, size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return cli_->Put(path, content_length, std::move(content_provider),
- content_type);
-}
-inline Result Client::Put(const char *path, const Headers &headers,
- size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return cli_->Put(path, headers, content_length, std::move(content_provider),
- content_type);
-}
-inline Result Client::Put(const char *path, const Params &params) {
- return cli_->Put(path, params);
-}
-inline Result Client::Put(const char *path, const Headers &headers,
- const Params &params) {
- return cli_->Put(path, headers, params);
-}
-inline Result Client::Patch(const char *path, const std::string &body,
- const char *content_type) {
- return cli_->Patch(path, body, content_type);
-}
-inline Result Client::Patch(const char *path, const Headers &headers,
- const std::string &body, const char *content_type) {
- return cli_->Patch(path, headers, body, content_type);
-}
-inline Result Client::Patch(const char *path, size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return cli_->Patch(path, content_length, std::move(content_provider),
- content_type);
-}
-inline Result Client::Patch(const char *path, const Headers &headers,
- size_t content_length,
- ContentProvider content_provider,
- const char *content_type) {
- return cli_->Patch(path, headers, content_length, std::move(content_provider),
- content_type);
-}
-inline Result Client::Delete(const char *path) { return cli_->Delete(path); }
-inline Result Client::Delete(const char *path, const std::string &body,
- const char *content_type) {
- return cli_->Delete(path, body, content_type);
-}
-inline Result Client::Delete(const char *path, const Headers &headers) {
- return cli_->Delete(path, headers);
-}
-inline Result Client::Delete(const char *path, const Headers &headers,
- const std::string &body,
- const char *content_type) {
- return cli_->Delete(path, headers, body, content_type);
-}
-inline Result Client::Options(const char *path) { return cli_->Options(path); }
-inline Result Client::Options(const char *path, const Headers &headers) {
- return cli_->Options(path, headers);
-}
-
-inline bool Client::send(const Request &req, Response &res) {
- return cli_->send(req, res);
-}
-
-inline size_t Client::is_socket_open() const { return cli_->is_socket_open(); }
-
-inline void Client::stop() { cli_->stop(); }
-
-inline void Client::set_default_headers(Headers headers) {
- cli_->set_default_headers(std::move(headers));
-}
-
-inline void Client::set_tcp_nodelay(bool on) { cli_->set_tcp_nodelay(on); }
-inline void Client::set_socket_options(SocketOptions socket_options) {
- cli_->set_socket_options(std::move(socket_options));
-}
-
-inline void Client::set_connection_timeout(time_t sec, time_t usec) {
- cli_->set_connection_timeout(sec, usec);
-}
-inline void Client::set_read_timeout(time_t sec, time_t usec) {
- cli_->set_read_timeout(sec, usec);
-}
-inline void Client::set_write_timeout(time_t sec, time_t usec) {
- cli_->set_write_timeout(sec, usec);
-}
-
-inline void Client::set_basic_auth(const char *username, const char *password) {
- cli_->set_basic_auth(username, password);
-}
-inline void Client::set_bearer_token_auth(const char *token) {
- cli_->set_bearer_token_auth(token);
-}
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void Client::set_digest_auth(const char *username,
- const char *password) {
- cli_->set_digest_auth(username, password);
-}
-#endif
-
-inline void Client::set_keep_alive(bool on) { cli_->set_keep_alive(on); }
-inline void Client::set_follow_location(bool on) {
- cli_->set_follow_location(on);
-}
-
-inline void Client::set_compress(bool on) { cli_->set_compress(on); }
-
-inline void Client::set_decompress(bool on) { cli_->set_decompress(on); }
-
-inline void Client::set_interface(const char *intf) {
- cli_->set_interface(intf);
-}
-
-inline void Client::set_proxy(const char *host, int port) {
- cli_->set_proxy(host, port);
-}
-inline void Client::set_proxy_basic_auth(const char *username,
- const char *password) {
- cli_->set_proxy_basic_auth(username, password);
-}
-inline void Client::set_proxy_bearer_token_auth(const char *token) {
- cli_->set_proxy_bearer_token_auth(token);
-}
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void Client::set_proxy_digest_auth(const char *username,
- const char *password) {
- cli_->set_proxy_digest_auth(username, password);
-}
-#endif
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void Client::enable_server_certificate_verification(bool enabled) {
- cli_->enable_server_certificate_verification(enabled);
-}
-#endif
-
-inline void Client::set_logger(Logger logger) { cli_->set_logger(logger); }
-
-#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
-inline void Client::set_ca_cert_path(const char *ca_cert_file_path,
- const char *ca_cert_dir_path) {
- if (is_ssl_) {
- static_cast<SSLClient &>(*cli_).set_ca_cert_path(ca_cert_file_path,
- ca_cert_dir_path);
- }
-}
-
-inline void Client::set_ca_cert_store(X509_STORE *ca_cert_store) {
- if (is_ssl_) {
- static_cast<SSLClient &>(*cli_).set_ca_cert_store(ca_cert_store);
- }
-}
-
-inline long Client::get_openssl_verify_result() const {
- if (is_ssl_) {
- return static_cast<SSLClient &>(*cli_).get_openssl_verify_result();
- }
- return -1; // NOTE: -1 doesn't match any of X509_V_ERR_???
-}
-
-inline SSL_CTX *Client::ssl_context() const {
- if (is_ssl_) { return static_cast<SSLClient &>(*cli_).ssl_context(); }
- return nullptr;
-}
-#endif
-
-// ----------------------------------------------------------------------------
-
-} // namespace httplib
-
-#endif // CPPHTTPLIB_HTTPLIB_H
diff --git a/externals/libusb/CMakeLists.txt b/externals/libusb/CMakeLists.txt
index 7180fd42a..151ddc462 100644
--- a/externals/libusb/CMakeLists.txt
+++ b/externals/libusb/CMakeLists.txt
@@ -1,10 +1,21 @@
-if (MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux"))
+if (MINGW OR (${CMAKE_SYSTEM_NAME} MATCHES "Linux") OR APPLE)
set(LIBUSB_FOUND ON CACHE BOOL "libusb is present" FORCE)
set(LIBUSB_VERSION "1.0.24" CACHE STRING "libusb version string" FORCE)
# GNU toolchains for some reason doesn't work with the later half of this CMakeLists after
# updating to 1.0.24, so we do it the old-fashioned way for now.
+ # Require autoconf and libtoolize here, rather than crash during compilation
+ find_program(AUTOCONF autoconf)
+ if ("${AUTOCONF}" STREQUAL "AUTOCONF-NOTFOUND")
+ message(FATAL_ERROR "Required program `autoconf` not found.")
+ endif()
+
+ find_program(LIBTOOLIZE libtoolize)
+ if ("${LIBTOOLIZE}" STREQUAL "LIBTOOLIZE-NOTFOUND")
+ message(FATAL_ERROR "Required program `libtoolize` not found.")
+ endif()
+
set(LIBUSB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/libusb")
set(LIBUSB_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/libusb")
diff --git a/externals/sirit b/externals/sirit
-Subproject eefca56afd49379bdebc97ded8b480839f93088
+Subproject a39596358a3a5488c06554c0c15184a6af71e43
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f30dd49a3..6e66dc1df 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,13 +45,23 @@ if (MSVC)
/Zc:inline
/Zc:throwingNew
+ # External headers diagnostics
+ /experimental:external # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later
+ /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers
+ /external:W0 # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers
+
# Warnings
/W3
- /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled
+ /we4018 # 'expression': signed/unsigned mismatch
+ /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled
/we4101 # 'identifier': unreferenced local variable
+ /we4189 # 'identifier': local variable is initialized but not referenced
/we4265 # 'class': class has virtual functions, but destructor is not virtual
- /we4388 # signed/unsigned mismatch
- /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
+ /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data
+ /we4305 # 'context': truncation from 'type1' to 'type2'
+ /we4388 # 'expression': signed/unsigned mismatch
+ /we4389 # 'operator': signed/unsigned mismatch
+ /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect
/we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
/we4555 # Expression has no effect; expected expression with side-effect
/we4715 # 'function': not all control paths return a value
@@ -72,6 +82,7 @@ else()
-Werror=missing-declarations
-Werror=missing-field-initializers
-Werror=reorder
+ -Werror=sign-compare
-Werror=switch
-Werror=uninitialized
-Werror=unused-function
@@ -131,6 +142,7 @@ add_subdirectory(core)
add_subdirectory(audio_core)
add_subdirectory(video_core)
add_subdirectory(input_common)
+add_subdirectory(shader_recompiler)
add_subdirectory(tests)
if (ENABLE_SDL2)
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d25a1a645..090dd19b1 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -51,9 +51,6 @@ if (NOT MSVC)
target_compile_options(audio_core PRIVATE
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=reorder
- -Werror=sign-compare
-Werror=shadow
-Werror=unused-parameter
-Werror=unused-variable
diff --git a/src/audio_core/audio_out.cpp b/src/audio_core/audio_out.cpp
index 20a756dce..44a899d08 100644
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -30,7 +30,8 @@ StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample
u32 num_channels, std::string&& name,
Stream::ReleaseCallback&& release_callback) {
if (!sink) {
- sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
+ sink = CreateSinkFromID(Settings::values.sink_id.GetValue(),
+ Settings::values.audio_device_id.GetValue());
}
return std::make_shared<Stream>(
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 80ffddb10..7dba739b4 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -12,6 +12,7 @@
#include "audio_core/voice_context.h"
#include "common/logging/log.h"
#include "common/settings.h"
+#include "core/core_timing.h"
#include "core/memory.h"
namespace {
@@ -28,10 +29,9 @@ namespace {
(static_cast<float>(r_channel) * r_mix_amount)));
}
-[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel,
- s16 fc_channel,
- [[maybe_unused]] s16 lf_channel,
- s16 bl_channel, s16 br_channel) {
+[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(
+ s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel,
+ s16 br_channel) {
// Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels
// are mixed to be 36.94%
@@ -56,11 +56,11 @@ namespace {
const std::array<float_le, 4>& coeff) {
const auto left =
static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
- static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0];
+ static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3];
const auto right =
static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
- static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0];
+ static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3];
return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))};
}
@@ -68,7 +68,9 @@ namespace {
} // namespace
namespace AudioCore {
-AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
+constexpr s32 NUM_BUFFERS = 2;
+
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_,
AudioCommon::AudioRendererParameter params,
Stream::ReleaseCallback&& release_callback,
std::size_t instance_number)
@@ -77,7 +79,8 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
sink_context(params.sink_count), splitter_context(),
voices(params.voice_count), memory{memory_},
command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context,
- memory) {
+ memory),
+ core_timing{core_timing_} {
behavior_info.SetUserRevision(params.revision);
splitter_context.Initialize(behavior_info, params.splitter_count,
params.num_splitter_send_channels);
@@ -86,16 +89,27 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
stream = audio_out->OpenStream(
core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
- audio_out->StartStream(stream);
-
- QueueMixedBuffer(0);
- QueueMixedBuffer(1);
- QueueMixedBuffer(2);
- QueueMixedBuffer(3);
+ process_event = Core::Timing::CreateEvent(
+ fmt::format("AudioRenderer-Instance{}-Process", instance_number),
+ [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); });
+ for (s32 i = 0; i < NUM_BUFFERS; ++i) {
+ QueueMixedBuffer(i);
+ }
}
AudioRenderer::~AudioRenderer() = default;
+ResultCode AudioRenderer::Start() {
+ audio_out->StartStream(stream);
+ ReleaseAndQueueBuffers();
+ return ResultSuccess;
+}
+
+ResultCode AudioRenderer::Stop() {
+ audio_out->StopStream(stream);
+ return ResultSuccess;
+}
+
u32 AudioRenderer::GetSampleRate() const {
return worker_params.sample_rate;
}
@@ -114,7 +128,7 @@ Stream::State AudioRenderer::GetStreamState() const {
ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params,
std::vector<u8>& output_params) {
-
+ std::scoped_lock lock{mutex};
InfoUpdater info_updater{input_params, output_params, behavior_info};
if (!info_updater.UpdateBehaviorInfo(behavior_info)) {
@@ -194,9 +208,6 @@ ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_param
LOG_ERROR(Audio, "Audio buffers were not consumed!");
return AudioCommon::Audren::ERR_INVALID_PARAMETERS;
}
-
- ReleaseAndQueueBuffers();
-
return ResultSuccess;
}
@@ -220,10 +231,8 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
command_generator.PostCommand();
// Base sample size
std::size_t BUFFER_SIZE{worker_params.sample_count};
- // Samples
- std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels());
- // Make sure to clear our samples
- std::memset(buffer.data(), 0, buffer.size() * sizeof(s16));
+ // Samples, making sure to clear
+ std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels(), 0);
if (sink_context.InUse()) {
const auto stream_channel_count = stream->GetNumChannels();
@@ -231,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
const auto channel_count = buffer_offsets.size();
const auto& final_mix = mix_context.GetFinalMixInfo();
const auto& in_params = final_mix.GetInParams();
- std::vector<s32*> mix_buffers(channel_count);
+ std::vector<std::span<s32>> mix_buffers(channel_count);
for (std::size_t i = 0; i < channel_count; i++) {
mix_buffers[i] =
command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]);
@@ -284,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample);
} else if (stream_channel_count == 2) {
// Mix all channels into 2 channels
- if (sink_context.HasDownMixingCoefficients()) {
- const auto [left, right] = Mix6To2WithCoefficients(
- fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
- sink_context.GetDownmixCoefficients());
- buffer[i * stream_channel_count + 0] = left;
- buffer[i * stream_channel_count + 1] = right;
- } else {
- const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample,
- lf_sample, bl_sample, br_sample);
- buffer[i * stream_channel_count + 0] = left;
- buffer[i * stream_channel_count + 1] = right;
- }
+ const auto [left, right] = Mix6To2WithCoefficients(
+ fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
+ sink_context.GetDownmixCoefficients());
+ buffer[i * stream_channel_count + 0] = left;
+ buffer[i * stream_channel_count + 1] = right;
} else if (stream_channel_count == 6) {
// Pass through
buffer[i * stream_channel_count + 0] = fl_sample;
@@ -315,10 +317,24 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
}
void AudioRenderer::ReleaseAndQueueBuffers() {
- const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
- for (const auto& tag : released_buffers) {
- QueueMixedBuffer(tag);
+ if (!stream->IsPlaying()) {
+ return;
}
+
+ {
+ std::scoped_lock lock{mutex};
+ const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
+ for (const auto& tag : released_buffers) {
+ QueueMixedBuffer(tag);
+ }
+ }
+
+ const f32 sample_rate = static_cast<f32>(GetSampleRate());
+ const f32 sample_count = static_cast<f32>(GetSampleCount());
+ const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240));
+ const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1;
+ const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1));
+ core_timing.ScheduleEvent(next_event_time, process_event, {});
}
} // namespace AudioCore
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 18567f618..88fdd13dd 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -6,6 +6,7 @@
#include <array>
#include <memory>
+#include <mutex>
#include <vector>
#include "audio_core/behavior_info.h"
@@ -45,6 +46,8 @@ public:
[[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
std::vector<u8>& output_params);
+ [[nodiscard]] ResultCode Start();
+ [[nodiscard]] ResultCode Stop();
void QueueMixedBuffer(Buffer::Tag tag);
void ReleaseAndQueueBuffers();
[[nodiscard]] u32 GetSampleRate() const;
@@ -68,6 +71,9 @@ private:
Core::Memory::Memory& memory;
CommandGenerator command_generator;
std::size_t elapsed_frame_count{};
+ Core::Timing::CoreTiming& core_timing;
+ std::shared_ptr<Core::Timing::EventType> process_event;
+ std::mutex mutex;
};
} // namespace AudioCore
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 437cc5ccd..45b2eef52 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
template <std::size_t N>
-void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) {
for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) {
for (std::size_t j = 0; j < N; j++) {
output[i + j] +=
@@ -40,7 +40,17 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
}
}
-s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) {
+s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta,
+ s32 sample_count) {
+ // XC2 passes in NaN mix volumes, causing further issues as we handle everything as s32 rather
+ // than float, so the NaN propogation is lost. As the samples get further modified for
+ // volume etc, they can get out of NaN range, so a later heuristic for catching this is
+ // more difficult. Handle it here by setting these samples to silence.
+ if (std::isnan(gain)) {
+ gain = 0.0f;
+ delta = 0.0f;
+ }
+
s32 x = 0;
for (s32 i = 0; i < sample_count; i++) {
x = static_cast<s32>(static_cast<float>(input[i]) * gain);
@@ -50,20 +60,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam
return x;
}
-void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) {
+void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta,
+ s32 sample_count) {
for (s32 i = 0; i < sample_count; i++) {
output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
gain += delta;
}
}
-void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) {
+void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain,
+ s32 sample_count) {
for (s32 i = 0; i < sample_count; i++) {
output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
}
}
-s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
+s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) {
const bool positive = first_sample > 0;
auto final_sample = std::abs(first_sample);
for (s32 i = 0; i < sample_count; i++) {
@@ -128,10 +140,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
template <std::size_t CHANNEL_COUNT>
-void ApplyReverbGeneric(I3dl2ReverbState& state,
- const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
- const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
- s32 sample_count) {
+void ApplyReverbGeneric(
+ I3dl2ReverbState& state,
+ const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input,
+ const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) {
auto GetTapLookup = []() {
if constexpr (CHANNEL_COUNT == 1) {
@@ -400,7 +412,10 @@ void CommandGenerator::GenerateDataSourceCommand(ServerVoiceInfo& voice_info, Vo
}
} else {
switch (in_params.sample_format) {
+ case SampleFormat::Pcm8:
case SampleFormat::Pcm16:
+ case SampleFormat::Pcm32:
+ case SampleFormat::PcmFloat:
DecodeFromWaveBuffers(voice_info, GetChannelMixBuffer(channel), dsp_state, channel,
worker_params.sample_rate, worker_params.sample_count,
in_params.node_id);
@@ -454,8 +469,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff
"input_mix_buffer={}, output_mix_buffer={}",
node_id, input_offset, output_offset);
}
- const auto* input = GetMixBuffer(input_offset);
- auto* output = GetMixBuffer(output_offset);
+ std::span<const s32> input = GetMixBuffer(input_offset);
+ std::span<s32> output = GetMixBuffer(output_offset);
// Biquad filter parameters
const auto [n0, n1, n2] = params.numerator;
@@ -548,8 +563,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
return;
}
- std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
- std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+ std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{};
+ std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{};
const auto status = params.status;
for (s32 i = 0; i < channel_count; i++) {
@@ -584,7 +599,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
for (s32 i = 0; i < channel_count; i++) {
// Only copy if the buffer input and output do not match!
if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
- std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+ std::memcpy(output[i].data(), input[i].data(),
+ worker_params.sample_count * sizeof(s32));
}
}
}
@@ -600,8 +616,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset,
for (s32 i = 0; i < channel_count; i++) {
// TODO(ogniK): Actually implement biquad filter
if (params.input[i] != params.output[i]) {
- const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
- auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
+ std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]);
+ std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]);
ApplyMix<1>(output, input, 32768, worker_params.sample_count);
}
}
@@ -640,14 +656,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf
if (samples_read != static_cast<int>(worker_params.sample_count) &&
samples_read <= params.sample_count) {
- std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read);
+ std::memset(GetMixBuffer(output_index).data(), 0,
+ params.sample_count - samples_read);
}
} else {
AuxInfoDSP empty{};
memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP));
memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP));
if (output_index != input_index) {
- std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index),
+ std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(),
worker_params.sample_count * sizeof(s32));
}
}
@@ -665,7 +682,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter
}
s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
- const s32* data, u32 sample_count, u32 write_offset,
+ std::span<const s32> data, u32 sample_count, u32 write_offset,
u32 write_count) {
if (max_samples == 0) {
return 0;
@@ -675,14 +692,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
return 0;
}
- std::size_t data_offset{};
+ s32 data_offset{};
u32 remaining = sample_count;
while (remaining > 0) {
// Get position in buffer
const auto base = send_buffer + (offset * sizeof(u32));
const auto samples_to_grab = std::min(max_samples - offset, remaining);
// Write to output
- memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32));
+ memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32));
offset = (offset + samples_to_grab) % max_samples;
remaining -= samples_to_grab;
data_offset += samples_to_grab;
@@ -695,7 +712,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
}
s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
- s32* out_data, u32 sample_count, u32 read_offset,
+ std::span<s32> out_data, u32 sample_count, u32 read_offset,
u32 read_count) {
if (max_samples == 0) {
return 0;
@@ -707,15 +724,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
}
u32 remaining = sample_count;
+ s32 data_offset{};
while (remaining > 0) {
const auto base = recv_buffer + (offset * sizeof(u32));
const auto samples_to_grab = std::min(max_samples - offset, remaining);
std::vector<s32> buffer(samples_to_grab);
memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32));
- std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32));
- out_data += samples_to_grab;
+ std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32));
offset = (offset + samples_to_grab) % max_samples;
remaining -= samples_to_grab;
+ data_offset += samples_to_grab;
}
if (read_count != 0) {
@@ -795,7 +813,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
state.lowpass_1 = 0.0f;
} else {
const auto a = 1.0f - hf_gain;
- const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
+ const auto b = 2.0f * (2.0f - hf_gain * CosD(256.0f * info.hf_reference /
static_cast<f32>(info.sample_rate)));
const auto c = std::sqrt(b * b - 4.0f * a * a);
@@ -843,7 +861,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
}
const auto max_early_delay = state.early_delay_line.GetMaxDelay();
- const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
+ const auto reflection_time = 1000.0f * (0.9998f * info.reverb_delay + 0.02f);
for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
const auto length = AudioCommon::CalculateDelaySamples(
sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
@@ -962,8 +980,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t
node_id, input_offset, output_offset, volume);
}
- auto* output = GetMixBuffer(output_offset);
- const auto* input = GetMixBuffer(input_offset);
+ std::span<s32> output = GetMixBuffer(output_offset);
+ std::span<const s32> input = GetMixBuffer(input_offset);
const s32 gain = static_cast<s32>(volume * 32768.0f);
// Mix with loop unrolling
@@ -1003,8 +1021,10 @@ void CommandGenerator::GenerateFinalMixCommand() {
}
}
-s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
- s32 sample_count, s32 channel, std::size_t mix_offset) {
+template <typename T>
+s32 CommandGenerator::DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
+ s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
+ s32 channel, std::size_t mix_offset) {
const auto& in_params = voice_info.GetInParams();
const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
if (wave_buffer.buffer_address == 0) {
@@ -1013,39 +1033,50 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s
if (wave_buffer.buffer_size == 0) {
return 0;
}
- if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) {
+ if (sample_end_offset < sample_start_offset) {
return 0;
}
- const auto samples_remaining =
- (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
+ const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
const auto start_offset =
- ((wave_buffer.start_sample_offset + dsp_state.offset) * in_params.channel_count) *
- sizeof(s16);
+ ((dsp_state.offset + sample_start_offset) * in_params.channel_count) * sizeof(T);
const auto buffer_pos = wave_buffer.buffer_address + start_offset;
const auto samples_processed = std::min(sample_count, samples_remaining);
- if (in_params.channel_count == 1) {
- std::vector<s16> buffer(samples_processed);
- memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
- for (std::size_t i = 0; i < buffer.size(); i++) {
- sample_buffer[mix_offset + i] = buffer[i];
- }
- } else {
- const auto channel_count = in_params.channel_count;
- std::vector<s16> buffer(samples_processed * channel_count);
- memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
+ const auto channel_count = in_params.channel_count;
+ std::vector<T> buffer(samples_processed * channel_count);
+ memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(T));
+ if constexpr (std::is_floating_point_v<T>) {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+ sample_buffer[mix_offset + i] = static_cast<s32>(buffer[i * channel_count + channel] *
+ std::numeric_limits<s16>::max());
+ }
+ } else if constexpr (sizeof(T) == 1) {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+ sample_buffer[mix_offset + i] =
+ static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
+ std::numeric_limits<s8>::max()) *
+ std::numeric_limits<s16>::max());
+ }
+ } else if constexpr (sizeof(T) == 2) {
for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
sample_buffer[mix_offset + i] = buffer[i * channel_count + channel];
}
+ } else {
+ for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
+ sample_buffer[mix_offset + i] =
+ static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
+ std::numeric_limits<s32>::max()) *
+ std::numeric_limits<s16>::max());
+ }
}
return samples_processed;
}
s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
- s32 sample_count, [[maybe_unused]] s32 channel,
- std::size_t mix_offset) {
+ s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
+ [[maybe_unused]] s32 channel, std::size_t mix_offset) {
const auto& in_params = voice_info.GetInParams();
const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
if (wave_buffer.buffer_address == 0) {
@@ -1054,7 +1085,7 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
if (wave_buffer.buffer_size == 0) {
return 0;
}
- if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) {
+ if (sample_end_offset < sample_start_offset) {
return 0;
}
@@ -1079,10 +1110,9 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
s32 coef1 = coeffs[idx * 2];
s32 coef2 = coeffs[idx * 2 + 1];
- const auto samples_remaining =
- (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
+ const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
const auto samples_processed = std::min(sample_count, samples_remaining);
- const auto sample_pos = wave_buffer.start_sample_offset + dsp_state.offset;
+ const auto sample_pos = dsp_state.offset + sample_start_offset;
const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME;
auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) +
@@ -1157,12 +1187,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
return samples_processed;
}
-s32* CommandGenerator::GetMixBuffer(std::size_t index) {
- return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) {
+ return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count),
+ worker_params.sample_count);
}
-const s32* CommandGenerator::GetMixBuffer(std::size_t index) const {
- return mix_buffer.data() + (index * worker_params.sample_count);
+std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const {
+ return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count),
+ worker_params.sample_count);
}
std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const {
@@ -1173,15 +1205,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const {
return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT;
}
-s32* CommandGenerator::GetChannelMixBuffer(s32 channel) {
+std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) {
return GetMixBuffer(worker_params.mix_buffer_count + channel);
}
-const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const {
+std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const {
return GetMixBuffer(worker_params.mix_buffer_count + channel);
}
-void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output,
+void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
VoiceState& dsp_state, s32 channel,
s32 target_sample_rate, s32 sample_count,
s32 node_id) {
@@ -1193,7 +1225,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate,
in_params.mix_id, in_params.splitter_info_id);
}
- ASSERT_OR_EXECUTE(output != nullptr, { return; });
+ ASSERT_OR_EXECUTE(output.data() != nullptr, { return; });
const auto resample_rate = static_cast<s32>(
static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) *
@@ -1210,9 +1242,9 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
}
std::size_t temp_mix_offset{};
- bool is_buffer_completed{false};
+ s32 samples_output{};
auto samples_remaining = sample_count;
- while (samples_remaining > 0 && !is_buffer_completed) {
+ while (samples_remaining > 0) {
const auto samples_to_output = std::min(samples_remaining, min_required_samples);
const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15;
@@ -1229,24 +1261,53 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
// No more data can be read
if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) {
- is_buffer_completed = true;
break;
}
if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 &&
wave_buffer.context_address != 0 && wave_buffer.context_size != 0) {
- // TODO(ogniK): ADPCM loop context
+ memory.ReadBlock(wave_buffer.context_address, &dsp_state.context,
+ sizeof(ADPCMContext));
+ }
+
+ s32 samples_offset_start;
+ s32 samples_offset_end;
+ if (dsp_state.loop_count > 0 && wave_buffer.loop_start_sample != 0 &&
+ wave_buffer.loop_end_sample != 0 &&
+ wave_buffer.loop_start_sample <= wave_buffer.loop_end_sample) {
+ samples_offset_start = wave_buffer.loop_start_sample;
+ samples_offset_end = wave_buffer.loop_end_sample;
+ } else {
+ samples_offset_start = wave_buffer.start_sample_offset;
+ samples_offset_end = wave_buffer.end_sample_offset;
}
s32 samples_decoded{0};
switch (in_params.sample_format) {
+ case SampleFormat::Pcm8:
+ samples_decoded =
+ DecodePcm<s8>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
+ break;
case SampleFormat::Pcm16:
- samples_decoded = DecodePcm16(voice_info, dsp_state, samples_to_read - samples_read,
- channel, temp_mix_offset);
+ samples_decoded =
+ DecodePcm<s16>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
+ break;
+ case SampleFormat::Pcm32:
+ samples_decoded =
+ DecodePcm<s32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
+ break;
+ case SampleFormat::PcmFloat:
+ samples_decoded =
+ DecodePcm<f32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
break;
case SampleFormat::Adpcm:
- samples_decoded = DecodeAdpcm(voice_info, dsp_state, samples_to_read - samples_read,
- channel, temp_mix_offset);
+ samples_decoded =
+ DecodeAdpcm(voice_info, dsp_state, samples_offset_start, samples_offset_end,
+ samples_to_read - samples_read, channel, temp_mix_offset);
break;
default:
UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format);
@@ -1257,15 +1318,19 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
dsp_state.offset += samples_decoded;
dsp_state.played_sample_count += samples_decoded;
- if (dsp_state.offset >=
- (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) ||
+ if (dsp_state.offset >= (samples_offset_end - samples_offset_start) ||
samples_decoded == 0) {
// Reset our sample offset
dsp_state.offset = 0;
if (wave_buffer.is_looping) {
- if (samples_decoded == 0) {
+ dsp_state.loop_count++;
+ if (wave_buffer.loop_count > 0 &&
+ (dsp_state.loop_count > wave_buffer.loop_count || samples_decoded == 0)) {
// End of our buffer
- is_buffer_completed = true;
+ voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
+ }
+
+ if (samples_decoded == 0) {
break;
}
@@ -1273,35 +1338,29 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
dsp_state.played_sample_count = 0;
}
} else {
-
// Update our wave buffer states
- dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
- dsp_state.wave_buffer_consumed++;
- dsp_state.wave_buffer_index =
- (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
- if (wave_buffer.end_of_stream) {
- dsp_state.played_sample_count = 0;
- }
+ voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
}
}
}
if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) {
// No need to resample
- std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32));
+ std::memcpy(output.data() + samples_output, sample_buffer.data(),
+ samples_read * sizeof(s32));
} else {
std::fill(sample_buffer.begin() + temp_mix_offset,
sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read),
0);
- AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction,
- samples_to_output);
+ AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate,
+ dsp_state.fraction, samples_to_output);
// Resample
for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) {
dsp_state.sample_history[i] = sample_buffer[samples_to_read + i];
}
}
- output += samples_to_output;
samples_remaining -= samples_to_output;
+ samples_output += samples_to_output;
}
}
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index 2ebb755b0..59a33ba76 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <span>
#include "audio_core/common.h"
#include "audio_core/voice_context.h"
#include "common/common_types.h"
@@ -41,10 +42,10 @@ public:
void PreCommand();
void PostCommand();
- [[nodiscard]] s32* GetChannelMixBuffer(s32 channel);
- [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const;
- [[nodiscard]] s32* GetMixBuffer(std::size_t index);
- [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const;
+ [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel);
+ [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const;
+ [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index);
+ [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const;
[[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const;
[[nodiscard]] std::size_t GetTotalMixBufferCount() const;
@@ -77,21 +78,24 @@ private:
void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled);
[[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index);
- s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data,
- u32 sample_count, u32 write_offset, u32 write_count);
- s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
- u32 sample_count, u32 read_offset, u32 read_count);
+ s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
+ std::span<const s32> data, u32 sample_count, u32 write_offset,
+ u32 write_count);
+ s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
+ std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count);
void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
std::vector<u8>& work_buffer);
void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
// DSP Code
- s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
- s32 channel, std::size_t mix_offset);
- s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
- s32 channel, std::size_t mix_offset);
- void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state,
- s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id);
+ template <typename T>
+ s32 DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
+ s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
+ s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
+ s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
+ void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
+ VoiceState& dsp_state, s32 channel, s32 target_sample_rate,
+ s32 sample_count, s32 node_id);
AudioCommon::AudioRendererParameter& worker_params;
VoiceContext& voice_context;
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index fe546c55d..1ab537588 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -15,7 +15,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
constexpr ResultCode ERR_SPLITTER_SORT_FAILED{ErrorModule::Audio, 43};
} // namespace Audren
-constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
+constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '9');
constexpr std::size_t MAX_MIX_BUFFERS = 24;
constexpr std::size_t MAX_BIQUAD_FILTERS = 2;
constexpr std::size_t MAX_CHANNEL_COUNT = 6;
diff --git a/src/audio_core/info_updater.cpp b/src/audio_core/info_updater.cpp
index 4a5b1b4ab..9b4ca1851 100644
--- a/src/audio_core/info_updater.cpp
+++ b/src/audio_core/info_updater.cpp
@@ -189,9 +189,6 @@ bool InfoUpdater::UpdateVoices(VoiceContext& voice_context,
if (voice_in_params.is_new) {
// Default our values for our voice
voice_info.Initialize();
- if (channel_count == 0 || channel_count > AudioCommon::MAX_CHANNEL_COUNT) {
- continue;
- }
// Zero out our voice states
for (std::size_t channel = 0; channel < channel_count; channel++) {
diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp
index a69543696..cc55b290c 100644
--- a/src/audio_core/sink_context.cpp
+++ b/src/audio_core/sink_context.cpp
@@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const {
void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) {
ASSERT(in.type == SinkTypes::Device);
- has_downmix_coefs = in.device.down_matrix_enabled;
- if (has_downmix_coefs) {
+ if (in.device.down_matrix_enabled) {
downmix_coefficients = in.device.down_matrix_coef;
+ } else {
+ downmix_coefficients = {
+ 1.0f, // front
+ 0.707f, // center
+ 0.0f, // lfe
+ 0.707f, // back
+ };
}
+
in_use = in.in_use;
use_count = in.device.input_count;
buffers = in.device.input;
@@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const {
return buffer_ret;
}
-bool SinkContext::HasDownMixingCoefficients() const {
- return has_downmix_coefs;
-}
-
const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const {
return downmix_coefficients;
}
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 66ee4e8a0..254961fe2 100644
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -4,6 +4,8 @@
#pragma once
+#include <array>
+#include <vector>
#include "audio_core/common.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -82,7 +84,6 @@ public:
[[nodiscard]] bool InUse() const;
[[nodiscard]] std::vector<u8> OutputBuffers() const;
- [[nodiscard]] bool HasDownMixingCoefficients() const;
[[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const;
private:
@@ -90,7 +91,6 @@ private:
s32 use_count{};
std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{};
std::size_t sink_count{};
- bool has_downmix_coefs{false};
DownmixCoefficients downmix_coefficients{};
};
} // namespace AudioCore
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index ad6c587c2..5a30f55a7 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -107,9 +107,12 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) {
active_buffer = queued_buffers.front();
queued_buffers.pop();
- VolumeAdjustSamples(active_buffer->GetSamples(), game_volume);
+ auto& samples = active_buffer->GetSamples();
- sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
+ VolumeAdjustSamples(samples, game_volume);
+
+ sink_stream.EnqueueSamples(GetNumChannels(), samples);
+ played_samples += samples.size();
const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer);
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 559844b9b..dbd97ec9c 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -89,6 +89,11 @@ public:
return sample_rate;
}
+ /// Gets the number of samples played so far
+ [[nodiscard]] u64 GetPlayedSampleCount() const {
+ return played_samples;
+ }
+
/// Gets the number of channels
[[nodiscard]] u32 GetNumChannels() const;
@@ -106,6 +111,7 @@ private:
[[nodiscard]] std::chrono::nanoseconds GetBufferReleaseNS(const Buffer& buffer) const;
u32 sample_rate; ///< Sample rate of the stream
+ u64 played_samples{}; ///< The current played sample count
Format format; ///< Format of the stream
float game_volume = 1.0f; ///< The volume the game currently has set
ReleaseCallback release_callback; ///< Buffer release callback for the stream
diff --git a/src/audio_core/voice_context.cpp b/src/audio_core/voice_context.cpp
index 867b8fc6b..d8c954b60 100644
--- a/src/audio_core/voice_context.cpp
+++ b/src/audio_core/voice_context.cpp
@@ -66,7 +66,7 @@ void ServerVoiceInfo::Initialize() {
in_params.last_volume = 0.0f;
in_params.biquad_filter.fill({});
in_params.wave_buffer_count = 0;
- in_params.wave_bufffer_head = 0;
+ in_params.wave_buffer_head = 0;
in_params.mix_id = AudioCommon::NO_MIX;
in_params.splitter_info_id = AudioCommon::NO_SPLITTER;
in_params.additional_params_address = 0;
@@ -75,7 +75,7 @@ void ServerVoiceInfo::Initialize() {
out_params.played_sample_count = 0;
out_params.wave_buffer_consumed = 0;
in_params.voice_drop_flag = false;
- in_params.buffer_mapped = false;
+ in_params.buffer_mapped = true;
in_params.wave_buffer_flush_request_count = 0;
in_params.was_biquad_filter_enabled.fill(false);
@@ -126,7 +126,7 @@ void ServerVoiceInfo::UpdateParameters(const VoiceInfo::InParams& voice_in,
in_params.volume = voice_in.volume;
in_params.biquad_filter = voice_in.biquad_filter;
in_params.wave_buffer_count = voice_in.wave_buffer_count;
- in_params.wave_bufffer_head = voice_in.wave_buffer_head;
+ in_params.wave_buffer_head = voice_in.wave_buffer_head;
if (behavior_info.IsFlushVoiceWaveBuffersSupported()) {
const auto in_request_count = in_params.wave_buffer_flush_request_count;
const auto voice_request_count = voice_in.wave_buffer_flush_request_count;
@@ -185,14 +185,16 @@ void ServerVoiceInfo::UpdateWaveBuffers(
wave_buffer.buffer_size = 0;
wave_buffer.context_address = 0;
wave_buffer.context_size = 0;
+ wave_buffer.loop_start_sample = 0;
+ wave_buffer.loop_end_sample = 0;
wave_buffer.sent_to_dsp = true;
}
// Mark all our wave buffers as invalid
for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count);
channel++) {
- for (auto& is_valid : voice_states[channel]->is_wave_buffer_valid) {
- is_valid = false;
+ for (std::size_t i = 0; i < AudioCommon::MAX_WAVE_BUFFERS; ++i) {
+ voice_states[channel]->is_wave_buffer_valid[i] = false;
}
}
}
@@ -211,7 +213,7 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
const WaveBuffer& in_wave_buffer, SampleFormat sample_format,
bool is_buffer_valid,
[[maybe_unused]] BehaviorInfo& behavior_info) {
- if (!is_buffer_valid && out_wavebuffer.sent_to_dsp) {
+ if (!is_buffer_valid && out_wavebuffer.sent_to_dsp && out_wavebuffer.buffer_address != 0) {
out_wavebuffer.buffer_address = 0;
out_wavebuffer.buffer_size = 0;
}
@@ -219,11 +221,40 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) {
// Validate sample offset sizings
if (sample_format == SampleFormat::Pcm16) {
- const auto buffer_size = in_wave_buffer.buffer_size;
- if (in_wave_buffer.start_sample_offset < 0 || in_wave_buffer.end_sample_offset < 0 ||
- (buffer_size < (sizeof(s16) * in_wave_buffer.start_sample_offset)) ||
- (buffer_size < (sizeof(s16) * in_wave_buffer.end_sample_offset))) {
+ const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
+ const s64 start = sizeof(s16) * in_wave_buffer.start_sample_offset;
+ const s64 end = sizeof(s16) * in_wave_buffer.end_sample_offset;
+ if (0 > start || start > buffer_size || 0 > end || end > buffer_size) {
// TODO(ogniK): Write error info
+ LOG_ERROR(Audio,
+ "PCM16 wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
+ "offsets were "
+ "{:08X} - 0x{:08X}",
+ buffer_size, sizeof(s16) * in_wave_buffer.start_sample_offset,
+ sizeof(s16) * in_wave_buffer.end_sample_offset);
+ return;
+ }
+ } else if (sample_format == SampleFormat::Adpcm) {
+ const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
+ const s64 start_frames = in_wave_buffer.start_sample_offset / 14;
+ const s64 start_extra = in_wave_buffer.start_sample_offset % 14 == 0
+ ? 0
+ : (in_wave_buffer.start_sample_offset % 14) / 2 + 1 +
+ (in_wave_buffer.start_sample_offset % 2);
+ const s64 start = start_frames * 8 + start_extra;
+ const s64 end_frames = in_wave_buffer.end_sample_offset / 14;
+ const s64 end_extra = in_wave_buffer.end_sample_offset % 14 == 0
+ ? 0
+ : (in_wave_buffer.end_sample_offset % 14) / 2 + 1 +
+ (in_wave_buffer.end_sample_offset % 2);
+ const s64 end = end_frames * 8 + end_extra;
+ if (in_wave_buffer.start_sample_offset < 0 || start > buffer_size ||
+ in_wave_buffer.end_sample_offset < 0 || end > buffer_size) {
+ LOG_ERROR(Audio,
+ "ADPMC wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
+ "offsets were "
+ "{:08X} - 0x{:08X}",
+ in_wave_buffer.buffer_size, start, end);
return;
}
}
@@ -239,29 +270,34 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
out_wavebuffer.buffer_size = in_wave_buffer.buffer_size;
out_wavebuffer.context_address = in_wave_buffer.context_address;
out_wavebuffer.context_size = in_wave_buffer.context_size;
+ out_wavebuffer.loop_start_sample = in_wave_buffer.loop_start_sample;
+ out_wavebuffer.loop_end_sample = in_wave_buffer.loop_end_sample;
in_params.buffer_mapped =
in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0;
// TODO(ogniK): Pool mapper attachment
// TODO(ogniK): IsAdpcmLoopContextBugFixed
+ if (sample_format == SampleFormat::Adpcm && in_wave_buffer.context_address != 0 &&
+ in_wave_buffer.context_size != 0 && behavior_info.IsAdpcmLoopContextBugFixed()) {
+ } else {
+ out_wavebuffer.context_address = 0;
+ out_wavebuffer.context_size = 0;
+ }
}
}
void ServerVoiceInfo::WriteOutStatus(
VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in,
std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) {
- if (voice_in.is_new) {
+ if (voice_in.is_new || in_params.is_new) {
in_params.is_new = true;
voice_out.wave_buffer_consumed = 0;
voice_out.played_sample_count = 0;
voice_out.voice_dropped = false;
- } else if (!in_params.is_new) {
- voice_out.wave_buffer_consumed = voice_states[0]->wave_buffer_consumed;
- voice_out.played_sample_count = voice_states[0]->played_sample_count;
- voice_out.voice_dropped = in_params.voice_drop_flag;
} else {
- voice_out.wave_buffer_consumed = 0;
- voice_out.played_sample_count = 0;
- voice_out.voice_dropped = false;
+ const auto& state = voice_states[0];
+ voice_out.wave_buffer_consumed = state->wave_buffer_consumed;
+ voice_out.played_sample_count = state->played_sample_count;
+ voice_out.voice_dropped = state->voice_dropped;
}
}
@@ -283,7 +319,8 @@ ServerVoiceInfo::OutParams& ServerVoiceInfo::GetOutParams() {
bool ServerVoiceInfo::ShouldSkip() const {
// TODO(ogniK): Handle unmapped wave buffers or parameters
- return !in_params.in_use || (in_params.wave_buffer_count == 0) || in_params.voice_drop_flag;
+ return !in_params.in_use || in_params.wave_buffer_count == 0 || !in_params.buffer_mapped ||
+ in_params.voice_drop_flag;
}
bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) {
@@ -381,7 +418,7 @@ bool ServerVoiceInfo::UpdateParametersForCommandGeneration(
void ServerVoiceInfo::FlushWaveBuffers(
u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
s32 channel_count) {
- auto wave_head = in_params.wave_bufffer_head;
+ auto wave_head = in_params.wave_buffer_head;
for (u8 i = 0; i < flush_count; i++) {
in_params.wave_buffer[wave_head].sent_to_dsp = true;
@@ -401,6 +438,17 @@ bool ServerVoiceInfo::HasValidWaveBuffer(const VoiceState* state) const {
return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end();
}
+void ServerVoiceInfo::SetWaveBufferCompleted(VoiceState& dsp_state,
+ const ServerWaveBuffer& wave_buffer) {
+ dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
+ dsp_state.wave_buffer_consumed++;
+ dsp_state.wave_buffer_index = (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
+ dsp_state.loop_count = 0;
+ if (wave_buffer.end_of_stream) {
+ dsp_state.played_sample_count = 0;
+ }
+}
+
VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} {
for (std::size_t i = 0; i < voice_count; i++) {
voice_channel_resources.emplace_back(static_cast<s32>(i));
diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h
index 70359cadb..e1050897b 100644
--- a/src/audio_core/voice_context.h
+++ b/src/audio_core/voice_context.h
@@ -60,10 +60,12 @@ struct WaveBuffer {
u8 is_looping{};
u8 end_of_stream{};
u8 sent_to_server{};
- INSERT_PADDING_BYTES(5);
+ INSERT_PADDING_BYTES(1);
+ s32 loop_count{};
u64 context_address{};
u64 context_size{};
- INSERT_PADDING_BYTES(8);
+ u32 loop_start_sample{};
+ u32 loop_end_sample{};
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size");
@@ -76,6 +78,9 @@ struct ServerWaveBuffer {
bool end_of_stream{};
VAddr context_address{};
std::size_t context_size{};
+ s32 loop_count{};
+ u32 loop_start_sample{};
+ u32 loop_end_sample{};
bool sent_to_dsp{true};
};
@@ -108,6 +113,7 @@ struct VoiceState {
u32 external_context_size;
bool is_external_context_used;
bool voice_dropped;
+ s32 loop_count;
};
class VoiceChannelResource {
@@ -206,7 +212,7 @@ public:
float last_volume{};
std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{};
s32 wave_buffer_count{};
- s16 wave_bufffer_head{};
+ s16 wave_buffer_head{};
INSERT_PADDING_BYTES(2);
BehaviorFlags behavior_flags{};
VAddr additional_params_address{};
@@ -252,6 +258,7 @@ public:
void FlushWaveBuffers(u8 flush_count,
std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
s32 channel_count);
+ void SetWaveBufferCompleted(VoiceState& dsp_state, const ServerWaveBuffer& wave_buffer);
private:
std::vector<s16> stored_samples;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 7534eb8f1..57922b51c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,8 +1,3 @@
-# Add a custom command to generate a new shader_cache_version hash when any of the following files change
-# NOTE: This is an approximation of what files affect shader generation, its possible something else
-# could affect the result, but much more unlikely than the following files. Keeping a list of files
-# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
-set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
if (DEFINED ENV{AZURECIREPO})
set(BUILD_REPOSITORY $ENV{AZURECIREPO})
endif()
@@ -30,64 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp
-DGIT_EXECUTABLE=${GIT_EXECUTABLE}
-P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake
DEPENDS
- # WARNING! It was too much work to try and make a common location for this list,
- # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
- "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
- "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
- "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
- "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
- "${VIDEO_CORE}/shader/decode/bfe.cpp"
- "${VIDEO_CORE}/shader/decode/bfi.cpp"
- "${VIDEO_CORE}/shader/decode/conversion.cpp"
- "${VIDEO_CORE}/shader/decode/ffma.cpp"
- "${VIDEO_CORE}/shader/decode/float_set.cpp"
- "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/half_set.cpp"
- "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/hfma2.cpp"
- "${VIDEO_CORE}/shader/decode/image.cpp"
- "${VIDEO_CORE}/shader/decode/integer_set.cpp"
- "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/memory.cpp"
- "${VIDEO_CORE}/shader/decode/texture.cpp"
- "${VIDEO_CORE}/shader/decode/other.cpp"
- "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
- "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
- "${VIDEO_CORE}/shader/decode/shift.cpp"
- "${VIDEO_CORE}/shader/decode/video.cpp"
- "${VIDEO_CORE}/shader/decode/warp.cpp"
- "${VIDEO_CORE}/shader/decode/xmad.cpp"
- "${VIDEO_CORE}/shader/ast.cpp"
- "${VIDEO_CORE}/shader/ast.h"
- "${VIDEO_CORE}/shader/compiler_settings.cpp"
- "${VIDEO_CORE}/shader/compiler_settings.h"
- "${VIDEO_CORE}/shader/control_flow.cpp"
- "${VIDEO_CORE}/shader/control_flow.h"
- "${VIDEO_CORE}/shader/decode.cpp"
- "${VIDEO_CORE}/shader/expr.cpp"
- "${VIDEO_CORE}/shader/expr.h"
- "${VIDEO_CORE}/shader/node.h"
- "${VIDEO_CORE}/shader/node_helper.cpp"
- "${VIDEO_CORE}/shader/node_helper.h"
- "${VIDEO_CORE}/shader/registry.cpp"
- "${VIDEO_CORE}/shader/registry.h"
- "${VIDEO_CORE}/shader/shader_ir.cpp"
- "${VIDEO_CORE}/shader/shader_ir.h"
- "${VIDEO_CORE}/shader/track.cpp"
- "${VIDEO_CORE}/shader/transform_feedback.cpp"
- "${VIDEO_CORE}/shader/transform_feedback.h"
- # and also check that the scm_rev files haven't changed
+ # Check that the scm_rev files haven't changed
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
# technically we should regenerate if the git version changed, but its not worth the effort imo
@@ -110,7 +48,6 @@ add_library(common STATIC
cityhash.cpp
cityhash.h
common_funcs.h
- common_sizes.h
common_types.h
concepts.h
div_ceil.h
@@ -134,6 +71,7 @@ add_library(common STATIC
host_memory.cpp
host_memory.h
intrusive_red_black_tree.h
+ literals.h
logging/backend.cpp
logging/backend.h
logging/filter.cpp
@@ -180,7 +118,6 @@ add_library(common STATIC
thread.cpp
thread.h
thread_queue_list.h
- thread_worker.cpp
thread_worker.h
threadsafe_queue.h
time_zone.cpp
@@ -188,6 +125,7 @@ add_library(common STATIC
tiny_mt.h
tree.h
uint128.h
+ unique_function.h
uuid.cpp
uuid.h
vector_math.h
@@ -231,7 +169,7 @@ endif()
create_target_directory_groups(common)
-target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
+target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile Threads::Threads)
target_link_libraries(common PRIVATE lz4::lz4 xbyak)
if (MSVC)
target_link_libraries(common PRIVATE zstd::zstd)
diff --git a/src/common/common_sizes.h b/src/common/common_sizes.h
deleted file mode 100644
index 7e9fd968b..000000000
--- a/src/common/common_sizes.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <limits>
-
-#include "common/common_types.h"
-
-namespace Common {
-
-enum : u64 {
- Size_1_KB = 0x400ULL,
- Size_64_KB = 64ULL * Size_1_KB,
- Size_128_KB = 128ULL * Size_1_KB,
- Size_1_MB = 0x100000ULL,
- Size_2_MB = 2ULL * Size_1_MB,
- Size_4_MB = 4ULL * Size_1_MB,
- Size_5_MB = 5ULL * Size_1_MB,
- Size_14_MB = 14ULL * Size_1_MB,
- Size_32_MB = 32ULL * Size_1_MB,
- Size_33_MB = 33ULL * Size_1_MB,
- Size_128_MB = 128ULL * Size_1_MB,
- Size_448_MB = 448ULL * Size_1_MB,
- Size_507_MB = 507ULL * Size_1_MB,
- Size_562_MB = 562ULL * Size_1_MB,
- Size_1554_MB = 1554ULL * Size_1_MB,
- Size_2048_MB = 2048ULL * Size_1_MB,
- Size_2193_MB = 2193ULL * Size_1_MB,
- Size_3285_MB = 3285ULL * Size_1_MB,
- Size_4916_MB = 4916ULL * Size_1_MB,
- Size_1_GB = 0x40000000ULL,
- Size_2_GB = 2ULL * Size_1_GB,
- Size_4_GB = 4ULL * Size_1_GB,
- Size_6_GB = 6ULL * Size_1_GB,
- Size_8_GB = 8ULL * Size_1_GB,
- Size_64_GB = 64ULL * Size_1_GB,
- Size_512_GB = 512ULL * Size_1_GB,
- Size_Invalid = std::numeric_limits<u64>::max(),
-};
-
-} // namespace Common
diff --git a/src/common/detached_tasks.cpp b/src/common/detached_tasks.cpp
index f2b4939df..c1362631e 100644
--- a/src/common/detached_tasks.cpp
+++ b/src/common/detached_tasks.cpp
@@ -21,6 +21,8 @@ void DetachedTasks::WaitForAllTasks() {
}
DetachedTasks::~DetachedTasks() {
+ WaitForAllTasks();
+
std::unique_lock lock{mutex};
ASSERT(count == 0);
instance = nullptr;
diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp
index 710e88b39..274f57659 100644
--- a/src/common/fs/file.cpp
+++ b/src/common/fs/file.cpp
@@ -172,7 +172,7 @@ std::string ReadStringFromFile(const std::filesystem::path& path, FileType type)
size_t WriteStringToFile(const std::filesystem::path& path, FileType type,
std::string_view string) {
- if (!IsFile(path)) {
+ if (Exists(path) && !IsFile(path)) {
return 0;
}
@@ -183,7 +183,7 @@ size_t WriteStringToFile(const std::filesystem::path& path, FileType type,
size_t AppendStringToFile(const std::filesystem::path& path, FileType type,
std::string_view string) {
- if (!IsFile(path)) {
+ if (Exists(path) && !IsFile(path)) {
return 0;
}
@@ -306,9 +306,9 @@ bool IOFile::Flush() const {
errno = 0;
#ifdef _WIN32
- const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+ const auto flush_result = std::fflush(file) == 0;
#else
- const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+ const auto flush_result = std::fflush(file) == 0;
#endif
if (!flush_result) {
@@ -320,6 +320,28 @@ bool IOFile::Flush() const {
return flush_result;
}
+bool IOFile::Commit() const {
+ if (!IsOpen()) {
+ return false;
+ }
+
+ errno = 0;
+
+#ifdef _WIN32
+ const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
+#else
+ const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
+#endif
+
+ if (!commit_result) {
+ const auto ec = std::error_code{errno, std::generic_category()};
+ LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}",
+ PathToUTF8String(file_path), ec.message());
+ }
+
+ return commit_result;
+}
+
bool IOFile::SetSize(u64 size) const {
if (!IsOpen()) {
return false;
@@ -347,6 +369,9 @@ u64 IOFile::GetSize() const {
return 0;
}
+ // Flush any unwritten buffered data into the file prior to retrieving the file size.
+ std::fflush(file);
+
std::error_code ec;
const auto file_size = fs::file_size(file_path, ec);
diff --git a/src/common/fs/file.h b/src/common/fs/file.h
index 0f10b6003..2c4ab4332 100644
--- a/src/common/fs/file.h
+++ b/src/common/fs/file.h
@@ -49,7 +49,7 @@ void OpenFileStream(FileStream& file_stream, const Path& path, std::ios_base::op
/**
* Reads an entire file at path and returns a string of the contents read from the file.
- * If the filesystem object at path is not a file, this function returns an empty string.
+ * If the filesystem object at path is not a regular file, this function returns an empty string.
*
* @param path Filesystem path
* @param type File type
@@ -72,7 +72,8 @@ template <typename Path>
/**
* Writes a string to a file at path and returns the number of characters successfully written.
* If a file already exists at path, its contents will be erased.
- * If the filesystem object at path is not a file, this function returns 0.
+ * If a file does not exist at path, it creates and opens a new empty file for writing.
+ * If the filesystem object at path exists and is not a regular file, this function returns 0.
*
* @param path Filesystem path
* @param type File type
@@ -95,7 +96,8 @@ template <typename Path>
/**
* Appends a string to a file at path and returns the number of characters successfully written.
- * If the filesystem object at path is not a file, this function returns 0.
+ * If a file does not exist at path, it creates and opens a new empty file for appending.
+ * If the filesystem object at path exists and is not a regular file, this function returns 0.
*
* @param path Filesystem path
* @param type File type
@@ -394,11 +396,20 @@ public:
[[nodiscard]] size_t WriteString(std::span<const char> string) const;
/**
- * Flushes any unwritten buffered data into the file.
+ * Attempts to flush any unwritten buffered data into the file.
*
* @returns True if the flush was successful, false otherwise.
*/
- [[nodiscard]] bool Flush() const;
+ bool Flush() const;
+
+ /**
+ * Attempts to commit the file into the disk.
+ * Note that this is an expensive operation as this forces the operating system to write
+ * the contents of the file associated with the file descriptor into the disk.
+ *
+ * @returns True if the commit was successful, false otherwise.
+ */
+ bool Commit() const;
/**
* Resizes the file to a given size.
diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp
index d3159e908..9089cad67 100644
--- a/src/common/fs/fs.cpp
+++ b/src/common/fs/fs.cpp
@@ -135,8 +135,9 @@ std::shared_ptr<IOFile> FileOpen(const fs::path& path, FileAccessMode mode, File
return nullptr;
}
- if (!IsFile(path)) {
- LOG_ERROR(Common_Filesystem, "Filesystem object at path={} is not a file",
+ if (Exists(path) && !IsFile(path)) {
+ LOG_ERROR(Common_Filesystem,
+ "Filesystem object at path={} exists and is not a regular file",
PathToUTF8String(path));
return nullptr;
}
diff --git a/src/common/fs/fs.h b/src/common/fs/fs.h
index f6f256349..183126de3 100644
--- a/src/common/fs/fs.h
+++ b/src/common/fs/fs.h
@@ -48,18 +48,18 @@ template <typename Path>
*
* Failures occur when:
* - Input path is not valid
- * - Filesystem object at path is not a file
+ * - Filesystem object at path is not a regular file
* - Filesystem at path is read only
*
* @param path Filesystem path
*
* @returns True if file removal succeeds or file does not exist, false otherwise.
*/
-[[nodiscard]] bool RemoveFile(const std::filesystem::path& path);
+bool RemoveFile(const std::filesystem::path& path);
#ifdef _WIN32
template <typename Path>
-[[nodiscard]] bool RemoveFile(const Path& path) {
+bool RemoveFile(const Path& path) {
if constexpr (IsChar<typename Path::value_type>) {
return RemoveFile(ToU8String(path));
} else {
@@ -74,7 +74,7 @@ template <typename Path>
* Failures occur when:
* - One or both input path(s) is not valid
* - Filesystem object at old_path does not exist
- * - Filesystem object at old_path is not a file
+ * - Filesystem object at old_path is not a regular file
* - Filesystem object at new_path exists
* - Filesystem at either path is read only
*
@@ -110,8 +110,8 @@ template <typename Path1, typename Path2>
*
* Failures occur when:
* - Input path is not valid
- * - Filesystem object at path is not a file
- * - The file is not opened
+ * - Filesystem object at path exists and is not a regular file
+ * - The file is not open
*
* @param path Filesystem path
* @param mode File access mode
@@ -251,11 +251,11 @@ template <typename Path>
*
* @returns True if directory removal succeeds or directory does not exist, false otherwise.
*/
-[[nodiscard]] bool RemoveDir(const std::filesystem::path& path);
+bool RemoveDir(const std::filesystem::path& path);
#ifdef _WIN32
template <typename Path>
-[[nodiscard]] bool RemoveDir(const Path& path) {
+bool RemoveDir(const Path& path) {
if constexpr (IsChar<typename Path::value_type>) {
return RemoveDir(ToU8String(path));
} else {
@@ -276,11 +276,11 @@ template <typename Path>
*
* @returns True if the directory and all of its contents are removed successfully, false otherwise.
*/
-[[nodiscard]] bool RemoveDirRecursively(const std::filesystem::path& path);
+bool RemoveDirRecursively(const std::filesystem::path& path);
#ifdef _WIN32
template <typename Path>
-[[nodiscard]] bool RemoveDirRecursively(const Path& path) {
+bool RemoveDirRecursively(const Path& path) {
if constexpr (IsChar<typename Path::value_type>) {
return RemoveDirRecursively(ToU8String(path));
} else {
@@ -301,11 +301,11 @@ template <typename Path>
*
* @returns True if all of the directory's contents are removed successfully, false otherwise.
*/
-[[nodiscard]] bool RemoveDirContentsRecursively(const std::filesystem::path& path);
+bool RemoveDirContentsRecursively(const std::filesystem::path& path);
#ifdef _WIN32
template <typename Path>
-[[nodiscard]] bool RemoveDirContentsRecursively(const Path& path) {
+bool RemoveDirContentsRecursively(const Path& path) {
if constexpr (IsChar<typename Path::value_type>) {
return RemoveDirContentsRecursively(ToU8String(path));
} else {
@@ -435,11 +435,13 @@ template <typename Path>
#endif
/**
- * Returns whether a filesystem object at path is a file.
+ * Returns whether a filesystem object at path is a regular file.
+ * A regular file is a file that stores text or binary data.
+ * It is not a directory, symlink, FIFO, socket, block device, or character device.
*
* @param path Filesystem path
*
- * @returns True if a filesystem object at path is a file, false otherwise.
+ * @returns True if a filesystem object at path is a regular file, false otherwise.
*/
[[nodiscard]] bool IsFile(const std::filesystem::path& path);
diff --git a/src/common/fs/fs_util.cpp b/src/common/fs/fs_util.cpp
index 357cf5855..9f8671982 100644
--- a/src/common/fs/fs_util.cpp
+++ b/src/common/fs/fs_util.cpp
@@ -20,6 +20,10 @@ std::string ToUTF8String(std::u8string_view u8_string) {
return std::string{u8_string.begin(), u8_string.end()};
}
+std::string BufferToUTF8String(std::span<const u8> buffer) {
+ return std::string{buffer.begin(), std::ranges::find(buffer, u8{0})};
+}
+
std::string PathToUTF8String(const std::filesystem::path& path) {
return ToUTF8String(path.u8string());
}
diff --git a/src/common/fs/fs_util.h b/src/common/fs/fs_util.h
index ec9950ee7..1ec82eb35 100644
--- a/src/common/fs/fs_util.h
+++ b/src/common/fs/fs_util.h
@@ -47,6 +47,17 @@ concept IsChar = std::same_as<T, char>;
[[nodiscard]] std::string ToUTF8String(std::u8string_view u8_string);
/**
+ * Converts a buffer of bytes to a UTF8-encoded std::string.
+ * This converts from the start of the buffer until the first encountered null-terminator.
+ * If no null-terminator is found, this converts the entire buffer instead.
+ *
+ * @param buffer Buffer of bytes
+ *
+ * @returns UTF-8 encoded std::string.
+ */
+[[nodiscard]] std::string BufferToUTF8String(std::span<const u8> buffer);
+
+/**
* Converts a filesystem path to a UTF-8 encoded std::string.
*
* @param path Filesystem path
diff --git a/src/common/hex_util.h b/src/common/hex_util.h
index a8d414fb8..f5f9e4507 100644
--- a/src/common/hex_util.h
+++ b/src/common/hex_util.h
@@ -53,8 +53,9 @@ template <typename ContiguousContainer>
std::string out;
out.reserve(std::size(data) * pad_width);
+ const auto format_str = fmt::runtime(upper ? "{:02X}" : "{:02x}");
for (const u8 c : data) {
- out += fmt::format(upper ? "{:02X}" : "{:02x}", c);
+ out += fmt::format(format_str, c);
}
return out;
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 8bd70abc7..2a5a7596c 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -34,7 +34,7 @@ constexpr size_t HugePageSize = 0x200000;
// Manually imported for MinGW compatibility
#ifndef MEM_RESERVE_PLACEHOLDER
-#define MEM_RESERVE_PLACEHOLDER 0x0004000
+#define MEM_RESERVE_PLACEHOLDER 0x00040000
#endif
#ifndef MEM_REPLACE_PLACEHOLDER
#define MEM_REPLACE_PLACEHOLDER 0x00004000
diff --git a/src/common/literals.h b/src/common/literals.h
new file mode 100644
index 000000000..d55fed40b
--- /dev/null
+++ b/src/common/literals.h
@@ -0,0 +1,31 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Common::Literals {
+
+constexpr u64 operator""_KiB(unsigned long long int x) {
+ return 1024ULL * x;
+}
+
+constexpr u64 operator""_MiB(unsigned long long int x) {
+ return 1024_KiB * x;
+}
+
+constexpr u64 operator""_GiB(unsigned long long int x) {
+ return 1024_MiB * x;
+}
+
+constexpr u64 operator""_TiB(unsigned long long int x) {
+ return 1024_GiB * x;
+}
+
+constexpr u64 operator""_PiB(unsigned long long int x) {
+ return 1024_TiB * x;
+}
+
+} // namespace Common::Literals
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index d5cff400f..61dddab3f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -19,6 +19,8 @@
#include "common/assert.h"
#include "common/fs/file.h"
#include "common/fs/fs.h"
+#include "common/literals.h"
+
#include "common/logging/backend.h"
#include "common/logging/log.h"
#include "common/logging/text_formatter.h"
@@ -98,8 +100,8 @@ private:
write_logs(entry);
}
- // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
- // where a system is repeatedly spamming logs even on close.
+ // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
+ // case where a system is repeatedly spamming logs even on close.
const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
int logs_written = 0;
while (logs_written++ < MAX_LOGS_TO_WRITE && message_queue.Pop(entry)) {
@@ -159,7 +161,7 @@ FileBackend::FileBackend(const std::filesystem::path& filename) {
// Existence checks are done within the functions themselves.
// We don't particularly care if these succeed or not.
- void(FS::RemoveFile(old_filename));
+ FS::RemoveFile(old_filename);
void(FS::RenameFile(filename, old_filename));
file =
@@ -169,24 +171,28 @@ FileBackend::FileBackend(const std::filesystem::path& filename) {
FileBackend::~FileBackend() = default;
void FileBackend::Write(const Entry& entry) {
- // prevent logs from going over the maximum size (in case its spamming and the user doesn't
- // know)
- constexpr std::size_t MAX_BYTES_WRITTEN = 100 * 1024 * 1024;
- constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1024 * 1024 * 1024;
-
if (!file->IsOpen()) {
return;
}
- if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) {
- return;
- } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) {
+ using namespace Common::Literals;
+ // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
+ constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
+ constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
+
+ const bool write_limit_exceeded =
+ bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
+ (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
+
+ // Close the file after the write limit is exceeded.
+ if (write_limit_exceeded) {
+ file->Close();
return;
}
bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n'));
if (entry.log_level >= Level::Error) {
- void(file->Flush());
+ file->Flush();
}
}
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 4f2cc29e1..f055f0e11 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Render, Software) \
SUB(Render, OpenGL) \
SUB(Render, Vulkan) \
+ CLS(Shader) \
+ SUB(Shader, SPIRV) \
+ SUB(Shader, GLASM) \
+ SUB(Shader, GLSL) \
CLS(Audio) \
SUB(Audio, DSP) \
SUB(Audio, Sink) \
diff --git a/src/common/logging/types.h b/src/common/logging/types.h
index ee9a1ed84..7ad0334fc 100644
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#pragma once
+
#include <chrono>
#include "common/common_types.h"
@@ -112,6 +114,10 @@ enum class Class : u8 {
Render_Software, ///< Software renderer backend
Render_OpenGL, ///< OpenGL backend
Render_Vulkan, ///< Vulkan backend
+ Shader, ///< Shader recompiler
+ Shader_SPIRV, ///< Shader SPIR-V code generation
+ Shader_GLASM, ///< Shader GLASM code generation
+ Shader_GLSL, ///< Shader GLSL code generation
Audio, ///< Audio emulation
Audio_DSP, ///< The HLE implementation of the DSP
Audio_Sink, ///< Emulator audio output backend
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 5f126f324..cc88994c6 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -14,7 +14,6 @@
#define BUILD_ID "@BUILD_ID@"
#define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@"
#define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@"
-#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
namespace Common {
@@ -28,7 +27,6 @@ const char g_build_version[] = BUILD_VERSION;
const char g_build_id[] = BUILD_ID;
const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE;
const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING;
-const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
} // namespace
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 9ec71eced..996315999 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -41,15 +41,15 @@ void LogSettings() {
LOG_INFO(Config, "yuzu Configuration:");
log_setting("Controls_UseDockedMode", values.use_docked_mode.GetValue());
log_setting("System_RngSeed", values.rng_seed.GetValue().value_or(0));
- log_setting("System_CurrentUser", values.current_user);
+ log_setting("System_CurrentUser", values.current_user.GetValue());
log_setting("System_LanguageIndex", values.language_index.GetValue());
log_setting("System_RegionIndex", values.region_index.GetValue());
log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue());
- log_setting("Renderer_UseFrameLimit", values.use_frame_limit.GetValue());
- log_setting("Renderer_FrameLimit", values.frame_limit.GetValue());
+ log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue());
+ log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue());
log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
log_setting("Renderer_UseAsynchronousGpuEmulation",
@@ -57,21 +57,22 @@ void LogSettings() {
log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
- log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
+ log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
+ log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
- log_setting("Audio_OutputEngine", values.sink_id);
+ log_setting("Audio_OutputEngine", values.sink_id.GetValue());
log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
- log_setting("Audio_OutputDevice", values.audio_device_id);
- log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd);
+ log_setting("Audio_OutputDevice", values.audio_device_id.GetValue());
+ log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd.GetValue());
log_path("DataStorage_CacheDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir));
log_path("DataStorage_ConfigDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir));
log_path("DataStorage_LoadDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::LoadDir));
log_path("DataStorage_NANDDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::NANDDir));
log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir));
- log_setting("Debugging_ProgramArgs", values.program_args);
- log_setting("Services_BCATBackend", values.bcat_backend);
- log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local);
+ log_setting("Debugging_ProgramArgs", values.program_args.GetValue());
+ log_setting("Services_BCATBackend", values.bcat_backend.GetValue());
+ log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local.GetValue());
}
bool IsConfiguringGlobal() {
@@ -92,8 +93,8 @@ bool IsGPULevelHigh() {
}
bool IsFastmemEnabled() {
- if (values.cpu_accuracy.GetValue() == CPUAccuracy::DebugMode) {
- return values.cpuopt_fastmem;
+ if (values.cpu_debug_mode) {
+ return static_cast<bool>(values.cpuopt_fastmem);
}
return true;
}
@@ -102,7 +103,7 @@ float Volume() {
if (values.audio_muted) {
return 0.0f;
}
- return values.volume.GetValue();
+ return values.volume.GetValue() / 100.0f;
}
void RestoreGlobalState(bool is_powered_on) {
@@ -122,6 +123,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.cpu_accuracy.SetGlobal(true);
values.cpuopt_unsafe_unfuse_fma.SetGlobal(true);
values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true);
+ values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
@@ -130,17 +132,18 @@ void RestoreGlobalState(bool is_powered_on) {
values.vulkan_device.SetGlobal(true);
values.aspect_ratio.SetGlobal(true);
values.max_anisotropy.SetGlobal(true);
- values.use_frame_limit.SetGlobal(true);
- values.frame_limit.SetGlobal(true);
+ values.use_speed_limit.SetGlobal(true);
+ values.speed_limit.SetGlobal(true);
values.use_disk_shader_cache.SetGlobal(true);
values.gpu_accuracy.SetGlobal(true);
values.use_asynchronous_gpu_emulation.SetGlobal(true);
values.use_nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true);
values.use_vsync.SetGlobal(true);
- values.use_assembly_shaders.SetGlobal(true);
+ values.shader_backend.SetGlobal(true);
values.use_asynchronous_shaders.SetGlobal(true);
values.use_fast_gpu_time.SetGlobal(true);
+ values.use_caches_gc.SetGlobal(true);
values.bg_red.SetGlobal(true);
values.bg_green.SetGlobal(true);
values.bg_blue.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 6198f2d9f..d8730f515 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -10,10 +10,12 @@
#include <map>
#include <optional>
#include <string>
+#include <utility>
#include <vector>
#include "common/common_types.h"
#include "common/settings_input.h"
+#include "input_common/udp/client.h"
namespace Settings {
@@ -22,6 +24,12 @@ enum class RendererBackend : u32 {
Vulkan = 1,
};
+enum class ShaderBackend : u32 {
+ GLSL = 0,
+ GLASM = 1,
+ SPIRV = 2,
+};
+
enum class GPUAccuracy : u32 {
Normal = 0,
High = 1,
@@ -29,73 +37,240 @@ enum class GPUAccuracy : u32 {
};
enum class CPUAccuracy : u32 {
- Accurate = 0,
- Unsafe = 1,
- DebugMode = 2,
+ Auto = 0,
+ Accurate = 1,
+ Unsafe = 2,
+};
+
+/** The BasicSetting class is a simple resource manager. It defines a label and default value
+ * alongside the actual value of the setting for simpler and less-error prone use with frontend
+ * configurations. Setting a default value and label is required, though subclasses may deviate from
+ * this requirement.
+ */
+template <typename Type>
+class BasicSetting {
+protected:
+ BasicSetting() = default;
+
+ /**
+ * Only sets the setting to the given initializer, leaving the other members to their default
+ * initializers.
+ *
+ * @param global_val Initial value of the setting
+ */
+ explicit BasicSetting(const Type& global_val) : global{global_val} {}
+
+public:
+ /**
+ * Sets a default value, label, and setting value.
+ *
+ * @param default_val Intial value of the setting, and default value of the setting
+ * @param name Label for the setting
+ */
+ explicit BasicSetting(const Type& default_val, const std::string& name)
+ : default_value{default_val}, global{default_val}, label{name} {}
+ ~BasicSetting() = default;
+
+ /**
+ * Returns a reference to the setting's value.
+ *
+ * @returns A reference to the setting
+ */
+ [[nodiscard]] const Type& GetValue() const {
+ return global;
+ }
+
+ /**
+ * Sets the setting to the given value.
+ *
+ * @param value The desired value
+ */
+ void SetValue(const Type& value) {
+ Type temp{value};
+ std::swap(global, temp);
+ }
+
+ /**
+ * Returns the value that this setting was created with.
+ *
+ * @returns A reference to the default value
+ */
+ [[nodiscard]] const Type& GetDefault() const {
+ return default_value;
+ }
+
+ /**
+ * Returns the label this setting was created with.
+ *
+ * @returns A reference to the label
+ */
+ [[nodiscard]] const std::string& GetLabel() const {
+ return label;
+ }
+
+ /**
+ * Assigns a value to the setting.
+ *
+ * @param value The desired setting value
+ *
+ * @returns A reference to the setting
+ */
+ const Type& operator=(const Type& value) {
+ Type temp{value};
+ std::swap(global, temp);
+ return global;
+ }
+
+ /**
+ * Returns a reference to the setting.
+ *
+ * @returns A reference to the setting
+ */
+ explicit operator const Type&() const {
+ return global;
+ }
+
+protected:
+ const Type default_value{}; ///< The default value
+ Type global{}; ///< The setting
+ const std::string label{}; ///< The setting's label
};
+/**
+ * The Setting class is a slightly more complex version of the BasicSetting class. This adds a
+ * custom setting to switch to when a guest application specifically requires it. The effect is that
+ * other components of the emulator can access the setting's intended value without any need for the
+ * component to ask whether the custom or global setting is needed at the moment.
+ *
+ * By default, the global setting is used.
+ *
+ * Like the BasicSetting, this requires setting a default value and label to use.
+ */
template <typename Type>
-class Setting final {
+class Setting final : public BasicSetting<Type> {
public:
- Setting() = default;
- explicit Setting(Type val) : global{val} {}
+ /**
+ * Sets a default value, label, and setting value.
+ *
+ * @param default_val Intial value of the setting, and default value of the setting
+ * @param name Label for the setting
+ */
+ explicit Setting(const Type& default_val, const std::string& name)
+ : BasicSetting<Type>(default_val, name) {}
~Setting() = default;
+
+ /**
+ * Tells this setting to represent either the global or custom setting when other member
+ * functions are used.
+ *
+ * @param to_global Whether to use the global or custom setting.
+ */
void SetGlobal(bool to_global) {
use_global = to_global;
}
- bool UsingGlobal() const {
+
+ /**
+ * Returns whether this setting is using the global setting or not.
+ *
+ * @returns The global state
+ */
+ [[nodiscard]] bool UsingGlobal() const {
return use_global;
}
- Type GetValue(bool need_global = false) const {
+
+ /**
+ * Returns either the global or custom setting depending on the values of this setting's global
+ * state or if the global value was specifically requested.
+ *
+ * @param need_global Request global value regardless of setting's state; defaults to false
+ *
+ * @returns The required value of the setting
+ */
+ [[nodiscard]] const Type& GetValue(bool need_global = false) const {
if (use_global || need_global) {
- return global;
+ return this->global;
}
- return local;
+ return custom;
}
+
+ /**
+ * Sets the current setting value depending on the global state.
+ *
+ * @param value The new value
+ */
void SetValue(const Type& value) {
+ Type temp{value};
if (use_global) {
- global = value;
+ std::swap(this->global, temp);
} else {
- local = value;
+ std::swap(custom, temp);
+ }
+ }
+
+ /**
+ * Assigns the current setting value depending on the global state.
+ *
+ * @param value The new value
+ *
+ * @returns A reference to the current setting value
+ */
+ const Type& operator=(const Type& value) {
+ Type temp{value};
+ if (use_global) {
+ std::swap(this->global, temp);
+ return this->global;
+ }
+ std::swap(custom, temp);
+ return custom;
+ }
+
+ /**
+ * Returns the current setting value depending on the global state.
+ *
+ * @returns A reference to the current setting value
+ */
+ explicit operator const Type&() const {
+ if (use_global) {
+ return this->global;
}
+ return custom;
}
private:
- bool use_global = true;
- Type global{};
- Type local{};
+ bool use_global{true}; ///< The setting's global state
+ Type custom{}; ///< The custom value of the setting
};
/**
- * The InputSetting class allows for getting a reference to either the global or local members.
+ * The InputSetting class allows for getting a reference to either the global or custom members.
* This is required as we cannot easily modify the values of user-defined types within containers
* using the SetValue() member function found in the Setting class. The primary purpose of this
- * class is to store an array of 10 PlayerInput structs for both the global and local (per-game)
- * setting and allows for easily accessing and modifying both settings.
+ * class is to store an array of 10 PlayerInput structs for both the global and custom setting and
+ * allows for easily accessing and modifying both settings.
*/
template <typename Type>
class InputSetting final {
public:
InputSetting() = default;
- explicit InputSetting(Type val) : global{val} {}
+ explicit InputSetting(Type val) : BasicSetting<Type>(val) {}
~InputSetting() = default;
void SetGlobal(bool to_global) {
use_global = to_global;
}
- bool UsingGlobal() const {
+ [[nodiscard]] bool UsingGlobal() const {
return use_global;
}
- Type& GetValue(bool need_global = false) {
+ [[nodiscard]] Type& GetValue(bool need_global = false) {
if (use_global || need_global) {
return global;
}
- return local;
+ return custom;
}
private:
- bool use_global = true;
- Type global{};
- Type local{};
+ bool use_global{true}; ///< The setting's global state
+ Type global{}; ///< The setting
+ Type custom{}; ///< The custom setting value
};
struct TouchFromButtonMap {
@@ -105,141 +280,162 @@ struct TouchFromButtonMap {
struct Values {
// Audio
- std::string audio_device_id;
- std::string sink_id;
- bool audio_muted;
- Setting<bool> enable_audio_stretching;
- Setting<float> volume;
+ BasicSetting<std::string> audio_device_id{"auto", "output_device"};
+ BasicSetting<std::string> sink_id{"auto", "output_engine"};
+ BasicSetting<bool> audio_muted{false, "audio_muted"};
+ Setting<bool> enable_audio_stretching{true, "enable_audio_stretching"};
+ Setting<u8> volume{100, "volume"};
// Core
- Setting<bool> use_multi_core;
+ Setting<bool> use_multi_core{true, "use_multi_core"};
// Cpu
- Setting<CPUAccuracy> cpu_accuracy;
-
- bool cpuopt_page_tables;
- bool cpuopt_block_linking;
- bool cpuopt_return_stack_buffer;
- bool cpuopt_fast_dispatcher;
- bool cpuopt_context_elimination;
- bool cpuopt_const_prop;
- bool cpuopt_misc_ir;
- bool cpuopt_reduce_misalign_checks;
- bool cpuopt_fastmem;
-
- Setting<bool> cpuopt_unsafe_unfuse_fma;
- Setting<bool> cpuopt_unsafe_reduce_fp_error;
- Setting<bool> cpuopt_unsafe_inaccurate_nan;
- Setting<bool> cpuopt_unsafe_fastmem_check;
+ Setting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, "cpu_accuracy"};
+ // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
+ BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
+ BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};
+
+ BasicSetting<bool> cpuopt_page_tables{true, "cpuopt_page_tables"};
+ BasicSetting<bool> cpuopt_block_linking{true, "cpuopt_block_linking"};
+ BasicSetting<bool> cpuopt_return_stack_buffer{true, "cpuopt_return_stack_buffer"};
+ BasicSetting<bool> cpuopt_fast_dispatcher{true, "cpuopt_fast_dispatcher"};
+ BasicSetting<bool> cpuopt_context_elimination{true, "cpuopt_context_elimination"};
+ BasicSetting<bool> cpuopt_const_prop{true, "cpuopt_const_prop"};
+ BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
+ BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
+ BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
+
+ Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
+ Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
+ Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
+ Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
+ Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
// Renderer
- Setting<RendererBackend> renderer_backend;
- bool renderer_debug;
- Setting<int> vulkan_device;
-
- Setting<u16> resolution_factor{1};
- Setting<int> fullscreen_mode;
- Setting<int> aspect_ratio;
- Setting<int> max_anisotropy;
- Setting<bool> use_frame_limit;
- Setting<u16> frame_limit;
- Setting<bool> use_disk_shader_cache;
- Setting<GPUAccuracy> gpu_accuracy;
- Setting<bool> use_asynchronous_gpu_emulation;
- Setting<bool> use_nvdec_emulation;
- Setting<bool> accelerate_astc;
- Setting<bool> use_vsync;
- Setting<bool> use_assembly_shaders;
- Setting<bool> use_asynchronous_shaders;
- Setting<bool> use_fast_gpu_time;
-
- Setting<float> bg_red;
- Setting<float> bg_green;
- Setting<float> bg_blue;
+ Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
+ BasicSetting<bool> renderer_debug{false, "debug"};
+ BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
+ BasicSetting<bool> disable_shader_loop_safety_checks{false,
+ "disable_shader_loop_safety_checks"};
+ Setting<int> vulkan_device{0, "vulkan_device"};
+
+ Setting<u16> resolution_factor{1, "resolution_factor"};
+ // *nix platforms may have issues with the borderless windowed fullscreen mode.
+ // Default to exclusive fullscreen on these platforms for now.
+ Setting<int> fullscreen_mode{
+#ifdef _WIN32
+ 0,
+#else
+ 1,
+#endif
+ "fullscreen_mode"};
+ Setting<int> aspect_ratio{0, "aspect_ratio"};
+ Setting<int> max_anisotropy{0, "max_anisotropy"};
+ Setting<bool> use_speed_limit{true, "use_speed_limit"};
+ Setting<u16> speed_limit{100, "speed_limit"};
+ Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
+ Setting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"};
+ Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
+ Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
+ Setting<bool> accelerate_astc{true, "accelerate_astc"};
+ Setting<bool> use_vsync{true, "use_vsync"};
+ BasicSetting<u16> fps_cap{1000, "fps_cap"};
+ BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"};
+ Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"};
+ Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
+ Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
+ Setting<bool> use_caches_gc{false, "use_caches_gc"};
+
+ Setting<u8> bg_red{0, "bg_red"};
+ Setting<u8> bg_green{0, "bg_green"};
+ Setting<u8> bg_blue{0, "bg_blue"};
// System
- Setting<std::optional<u32>> rng_seed;
+ Setting<std::optional<u32>> rng_seed{std::optional<u32>(), "rng_seed"};
// Measured in seconds since epoch
std::optional<std::chrono::seconds> custom_rtc;
// Set on game boot, reset on stop. Seconds difference between current time and `custom_rtc`
std::chrono::seconds custom_rtc_differential;
- s32 current_user;
- Setting<s32> language_index;
- Setting<s32> region_index;
- Setting<s32> time_zone_index;
- Setting<s32> sound_index;
+ BasicSetting<s32> current_user{0, "current_user"};
+ Setting<s32> language_index{1, "language_index"};
+ Setting<s32> region_index{1, "region_index"};
+ Setting<s32> time_zone_index{0, "time_zone_index"};
+ Setting<s32> sound_index{1, "sound_index"};
// Controls
InputSetting<std::array<PlayerInput, 10>> players;
- Setting<bool> use_docked_mode;
+ Setting<bool> use_docked_mode{true, "use_docked_mode"};
- Setting<bool> vibration_enabled;
- Setting<bool> enable_accurate_vibrations;
+ Setting<bool> vibration_enabled{true, "vibration_enabled"};
+ Setting<bool> enable_accurate_vibrations{false, "enable_accurate_vibrations"};
- Setting<bool> motion_enabled;
- std::string motion_device;
- std::string udp_input_servers;
+ Setting<bool> motion_enabled{true, "motion_enabled"};
+ BasicSetting<std::string> motion_device{"engine:motion_emu,update_period:100,sensitivity:0.01",
+ "motion_device"};
+ BasicSetting<std::string> udp_input_servers{InputCommon::CemuhookUDP::DEFAULT_SRV,
+ "udp_input_servers"};
- bool mouse_panning;
- float mouse_panning_sensitivity;
- bool mouse_enabled;
+ BasicSetting<bool> mouse_panning{false, "mouse_panning"};
+ BasicSetting<u8> mouse_panning_sensitivity{10, "mouse_panning_sensitivity"};
+ BasicSetting<bool> mouse_enabled{false, "mouse_enabled"};
std::string mouse_device;
MouseButtonsRaw mouse_buttons;
- bool emulate_analog_keyboard;
- bool keyboard_enabled;
+ BasicSetting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"};
+ BasicSetting<bool> keyboard_enabled{false, "keyboard_enabled"};
KeyboardKeysRaw keyboard_keys;
KeyboardModsRaw keyboard_mods;
- bool debug_pad_enabled;
+ BasicSetting<bool> debug_pad_enabled{false, "debug_pad_enabled"};
ButtonsRaw debug_pad_buttons;
AnalogsRaw debug_pad_analogs;
TouchscreenInput touchscreen;
- bool use_touch_from_button;
- std::string touch_device;
- int touch_from_button_map_index;
+ BasicSetting<bool> use_touch_from_button{false, "use_touch_from_button"};
+ BasicSetting<std::string> touch_device{"min_x:100,min_y:50,max_x:1800,max_y:850",
+ "touch_device"};
+ BasicSetting<int> touch_from_button_map_index{0, "touch_from_button_map"};
std::vector<TouchFromButtonMap> touch_from_button_maps;
std::atomic_bool is_device_reload_pending{true};
// Data Storage
- bool use_virtual_sd;
- bool gamecard_inserted;
- bool gamecard_current_game;
- std::string gamecard_path;
+ BasicSetting<bool> use_virtual_sd{true, "use_virtual_sd"};
+ BasicSetting<bool> gamecard_inserted{false, "gamecard_inserted"};
+ BasicSetting<bool> gamecard_current_game{false, "gamecard_current_game"};
+ BasicSetting<std::string> gamecard_path{std::string(), "gamecard_path"};
// Debugging
bool record_frame_times;
- bool use_gdbstub;
- u16 gdbstub_port;
- std::string program_args;
- bool dump_exefs;
- bool dump_nso;
- bool enable_fs_access_log;
- bool reporting_services;
- bool quest_flag;
- bool disable_macro_jit;
- bool extended_logging;
- bool use_debug_asserts;
- bool use_auto_stub;
+ BasicSetting<bool> use_gdbstub{false, "use_gdbstub"};
+ BasicSetting<u16> gdbstub_port{0, "gdbstub_port"};
+ BasicSetting<std::string> program_args{std::string(), "program_args"};
+ BasicSetting<bool> dump_exefs{false, "dump_exefs"};
+ BasicSetting<bool> dump_nso{false, "dump_nso"};
+ BasicSetting<bool> enable_fs_access_log{false, "enable_fs_access_log"};
+ BasicSetting<bool> reporting_services{false, "reporting_services"};
+ BasicSetting<bool> quest_flag{false, "quest_flag"};
+ BasicSetting<bool> disable_macro_jit{false, "disable_macro_jit"};
+ BasicSetting<bool> extended_logging{false, "extended_logging"};
+ BasicSetting<bool> use_debug_asserts{false, "use_debug_asserts"};
+ BasicSetting<bool> use_auto_stub{false, "use_auto_stub"};
// Miscellaneous
- std::string log_filter;
- bool use_dev_keys;
+ BasicSetting<std::string> log_filter{"*:Info", "log_filter"};
+ BasicSetting<bool> use_dev_keys{false, "use_dev_keys"};
// Services
- std::string bcat_backend;
- bool bcat_boxcat_local;
+ BasicSetting<std::string> bcat_backend{"none", "bcat_backend"};
+ BasicSetting<bool> bcat_boxcat_local{false, "bcat_boxcat_local"};
// WebService
- bool enable_telemetry;
- std::string web_api_url;
- std::string yuzu_username;
- std::string yuzu_token;
+ BasicSetting<bool> enable_telemetry{true, "enable_telemetry"};
+ BasicSetting<std::string> web_api_url{"https://api.yuzu-emu.org", "web_api_url"};
+ BasicSetting<std::string> yuzu_username{std::string(), "yuzu_username"};
+ BasicSetting<std::string> yuzu_token{std::string(), "yuzu_token"};
// Add-Ons
std::map<u64, std::vector<std::string>> disabled_addons;
diff --git a/src/common/thread_worker.cpp b/src/common/thread_worker.cpp
deleted file mode 100644
index 8f9bf447a..000000000
--- a/src/common/thread_worker.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2020 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/thread.h"
-#include "common/thread_worker.h"
-
-namespace Common {
-
-ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) {
- for (std::size_t i = 0; i < num_workers; ++i)
- threads.emplace_back([this, thread_name{std::string{name}}] {
- Common::SetCurrentThreadName(thread_name.c_str());
-
- // Wait for first request
- {
- std::unique_lock lock{queue_mutex};
- condition.wait(lock, [this] { return stop || !requests.empty(); });
- }
-
- while (true) {
- std::function<void()> task;
-
- {
- std::unique_lock lock{queue_mutex};
- condition.wait(lock, [this] { return stop || !requests.empty(); });
- if (stop || requests.empty()) {
- return;
- }
- task = std::move(requests.front());
- requests.pop();
- }
-
- task();
- }
- });
-}
-
-ThreadWorker::~ThreadWorker() {
- {
- std::unique_lock lock{queue_mutex};
- stop = true;
- }
- condition.notify_all();
- for (std::thread& thread : threads) {
- thread.join();
- }
-}
-
-void ThreadWorker::QueueWork(std::function<void()>&& work) {
- {
- std::unique_lock lock{queue_mutex};
- requests.emplace(work);
- }
- condition.notify_one();
-}
-
-} // namespace Common
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
index f1859971f..cd0017726 100644
--- a/src/common/thread_worker.h
+++ b/src/common/thread_worker.h
@@ -5,26 +5,113 @@
#pragma once
#include <atomic>
+#include <condition_variable>
#include <functional>
#include <mutex>
+#include <stop_token>
#include <string>
+#include <thread>
+#include <type_traits>
#include <vector>
#include <queue>
+#include "common/thread.h"
+#include "common/unique_function.h"
+
namespace Common {
-class ThreadWorker final {
+template <class StateType = void>
+class StatefulThreadWorker {
+ static constexpr bool with_state = !std::is_same_v<StateType, void>;
+
+ struct DummyCallable {
+ int operator()() const noexcept {
+ return 0;
+ }
+ };
+
+ using Task =
+ std::conditional_t<with_state, UniqueFunction<void, StateType*>, UniqueFunction<void>>;
+ using StateMaker = std::conditional_t<with_state, std::function<StateType()>, DummyCallable>;
+
public:
- explicit ThreadWorker(std::size_t num_workers, const std::string& name);
- ~ThreadWorker();
- void QueueWork(std::function<void()>&& work);
+ explicit StatefulThreadWorker(size_t num_workers, std::string name, StateMaker func = {})
+ : workers_queued{num_workers}, thread_name{std::move(name)} {
+ const auto lambda = [this, func](std::stop_token stop_token) {
+ Common::SetCurrentThreadName(thread_name.c_str());
+ {
+ [[maybe_unused]] std::conditional_t<with_state, StateType, int> state{func()};
+ while (!stop_token.stop_requested()) {
+ Task task;
+ {
+ std::unique_lock lock{queue_mutex};
+ if (requests.empty()) {
+ wait_condition.notify_all();
+ }
+ condition.wait(lock, stop_token, [this] { return !requests.empty(); });
+ if (stop_token.stop_requested()) {
+ break;
+ }
+ task = std::move(requests.front());
+ requests.pop();
+ }
+ if constexpr (with_state) {
+ task(&state);
+ } else {
+ task();
+ }
+ ++work_done;
+ }
+ }
+ ++workers_stopped;
+ wait_condition.notify_all();
+ };
+ threads.reserve(num_workers);
+ for (size_t i = 0; i < num_workers; ++i) {
+ threads.emplace_back(lambda);
+ }
+ }
+
+ StatefulThreadWorker& operator=(const StatefulThreadWorker&) = delete;
+ StatefulThreadWorker(const StatefulThreadWorker&) = delete;
+
+ StatefulThreadWorker& operator=(StatefulThreadWorker&&) = delete;
+ StatefulThreadWorker(StatefulThreadWorker&&) = delete;
+
+ void QueueWork(Task work) {
+ {
+ std::unique_lock lock{queue_mutex};
+ requests.emplace(std::move(work));
+ ++work_scheduled;
+ }
+ condition.notify_one();
+ }
+
+ void WaitForRequests(std::stop_token stop_token = {}) {
+ std::stop_callback callback(stop_token, [this] {
+ for (auto& thread : threads) {
+ thread.request_stop();
+ }
+ });
+ std::unique_lock lock{queue_mutex};
+ wait_condition.wait(lock, [this] {
+ return workers_stopped >= workers_queued || work_done >= work_scheduled;
+ });
+ }
private:
- std::vector<std::thread> threads;
- std::queue<std::function<void()>> requests;
+ std::queue<Task> requests;
std::mutex queue_mutex;
- std::condition_variable condition;
- std::atomic_bool stop{};
+ std::condition_variable_any condition;
+ std::condition_variable wait_condition;
+ std::atomic<size_t> work_scheduled{};
+ std::atomic<size_t> work_done{};
+ std::atomic<size_t> workers_stopped{};
+ std::atomic<size_t> workers_queued{};
+ std::string thread_name;
+ std::vector<std::jthread> threads;
};
+using ThreadWorker = StatefulThreadWorker<>;
+
} // namespace Common
diff --git a/src/common/unique_function.h b/src/common/unique_function.h
new file mode 100644
index 000000000..ca0559071
--- /dev/null
+++ b/src/common/unique_function.h
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+namespace Common {
+
+/// General purpose function wrapper similar to std::function.
+/// Unlike std::function, the captured values don't have to be copyable.
+/// This class can be moved but not copied.
+template <typename ResultType, typename... Args>
+class UniqueFunction {
+ class CallableBase {
+ public:
+ virtual ~CallableBase() = default;
+ virtual ResultType operator()(Args&&...) = 0;
+ };
+
+ template <typename Functor>
+ class Callable final : public CallableBase {
+ public:
+ Callable(Functor&& functor_) : functor{std::move(functor_)} {}
+ ~Callable() override = default;
+
+ ResultType operator()(Args&&... args) override {
+ return functor(std::forward<Args>(args)...);
+ }
+
+ private:
+ Functor functor;
+ };
+
+public:
+ UniqueFunction() = default;
+
+ template <typename Functor>
+ UniqueFunction(Functor&& functor)
+ : callable{std::make_unique<Callable<Functor>>(std::move(functor))} {}
+
+ UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default;
+ UniqueFunction(UniqueFunction&& rhs) noexcept = default;
+
+ UniqueFunction& operator=(const UniqueFunction&) = delete;
+ UniqueFunction(const UniqueFunction&) = delete;
+
+ ResultType operator()(Args&&... args) const {
+ return (*callable)(std::forward<Args>(args)...);
+ }
+
+ explicit operator bool() const noexcept {
+ return static_cast<bool>(callable);
+ }
+
+private:
+ std::unique_ptr<CallableBase> callable;
+};
+
+} // namespace Common
diff --git a/src/common/uuid.h b/src/common/uuid.h
index 2e7a18405..0ffa37e7c 100644
--- a/src/common/uuid.h
+++ b/src/common/uuid.h
@@ -20,12 +20,11 @@ struct UUID {
constexpr explicit UUID(const u64 lo, const u64 hi) : uuid{{lo, hi}} {}
[[nodiscard]] constexpr explicit operator bool() const {
- return uuid[0] != INVALID_UUID[0] && uuid[1] != INVALID_UUID[1];
+ return uuid != INVALID_UUID;
}
[[nodiscard]] constexpr bool operator==(const UUID& rhs) const {
- // TODO(DarkLordZach): Replace with uuid == rhs.uuid with C++20
- return uuid[0] == rhs.uuid[0] && uuid[1] == rhs.uuid[1];
+ return uuid == rhs.uuid;
}
[[nodiscard]] constexpr bool operator!=(const UUID& rhs) const {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index efb851f5a..5c99c00f5 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -139,6 +139,7 @@ add_library(core STATIC
frontend/input.h
hardware_interrupt_manager.cpp
hardware_interrupt_manager.h
+ hle/api_version.h
hle/ipc.h
hle/ipc_helpers.h
hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -271,22 +272,22 @@ add_library(core STATIC
hle/service/am/applet_ae.h
hle/service/am/applet_oe.cpp
hle/service/am/applet_oe.h
+ hle/service/am/applets/applet_controller.cpp
+ hle/service/am/applets/applet_controller.h
+ hle/service/am/applets/applet_error.cpp
+ hle/service/am/applets/applet_error.h
+ hle/service/am/applets/applet_general_backend.cpp
+ hle/service/am/applets/applet_general_backend.h
+ hle/service/am/applets/applet_profile_select.cpp
+ hle/service/am/applets/applet_profile_select.h
+ hle/service/am/applets/applet_software_keyboard.cpp
+ hle/service/am/applets/applet_software_keyboard.h
+ hle/service/am/applets/applet_software_keyboard_types.h
+ hle/service/am/applets/applet_web_browser.cpp
+ hle/service/am/applets/applet_web_browser.h
+ hle/service/am/applets/applet_web_browser_types.h
hle/service/am/applets/applets.cpp
hle/service/am/applets/applets.h
- hle/service/am/applets/controller.cpp
- hle/service/am/applets/controller.h
- hle/service/am/applets/error.cpp
- hle/service/am/applets/error.h
- hle/service/am/applets/general_backend.cpp
- hle/service/am/applets/general_backend.h
- hle/service/am/applets/profile_select.cpp
- hle/service/am/applets/profile_select.h
- hle/service/am/applets/software_keyboard.cpp
- hle/service/am/applets/software_keyboard.h
- hle/service/am/applets/software_keyboard_types.h
- hle/service/am/applets/web_browser.cpp
- hle/service/am/applets/web_browser.h
- hle/service/am/applets/web_types.h
hle/service/am/idle.cpp
hle/service/am/idle.h
hle/service/am/omm.cpp
@@ -299,10 +300,10 @@ add_library(core STATIC
hle/service/aoc/aoc_u.h
hle/service/apm/apm.cpp
hle/service/apm/apm.h
- hle/service/apm/controller.cpp
- hle/service/apm/controller.h
- hle/service/apm/interface.cpp
- hle/service/apm/interface.h
+ hle/service/apm/apm_controller.cpp
+ hle/service/apm/apm_controller.h
+ hle/service/apm/apm_interface.cpp
+ hle/service/apm/apm_interface.h
hle/service/audio/audctl.cpp
hle/service/audio/audctl.h
hle/service/audio/auddbg.cpp
@@ -334,8 +335,8 @@ add_library(core STATIC
hle/service/bcat/backend/backend.h
hle/service/bcat/bcat.cpp
hle/service/bcat/bcat.h
- hle/service/bcat/module.cpp
- hle/service/bcat/module.h
+ hle/service/bcat/bcat_module.cpp
+ hle/service/bcat/bcat_module.h
hle/service/bpc/bpc.cpp
hle/service/bpc/bpc.h
hle/service/btdrv/btdrv.cpp
@@ -381,8 +382,8 @@ add_library(core STATIC
hle/service/friend/errors.h
hle/service/friend/friend.cpp
hle/service/friend/friend.h
- hle/service/friend/interface.cpp
- hle/service/friend/interface.h
+ hle/service/friend/friend_interface.cpp
+ hle/service/friend/friend_interface.h
hle/service/glue/arp.cpp
hle/service/glue/arp.h
hle/service/glue/bgtc.cpp
@@ -392,8 +393,8 @@ add_library(core STATIC
hle/service/glue/errors.h
hle/service/glue/glue.cpp
hle/service/glue/glue.h
- hle/service/glue/manager.cpp
- hle/service/glue/manager.h
+ hle/service/glue/glue_manager.cpp
+ hle/service/glue/glue_manager.h
hle/service/grc/grc.cpp
hle/service/grc/grc.h
hle/service/hid/hid.cpp
@@ -434,10 +435,10 @@ add_library(core STATIC
hle/service/lm/lm.h
hle/service/mig/mig.cpp
hle/service/mig/mig.h
- hle/service/mii/manager.cpp
- hle/service/mii/manager.h
hle/service/mii/mii.cpp
hle/service/mii/mii.h
+ hle/service/mii/mii_manager.cpp
+ hle/service/mii/mii_manager.h
hle/service/mii/raw_data.cpp
hle/service/mii/raw_data.h
hle/service/mii/types.h
@@ -485,11 +486,11 @@ add_library(core STATIC
hle/service/nvdrv/devices/nvhost_vic.h
hle/service/nvdrv/devices/nvmap.cpp
hle/service/nvdrv/devices/nvmap.h
- hle/service/nvdrv/interface.cpp
- hle/service/nvdrv/interface.h
hle/service/nvdrv/nvdata.h
hle/service/nvdrv/nvdrv.cpp
hle/service/nvdrv/nvdrv.h
+ hle/service/nvdrv/nvdrv_interface.cpp
+ hle/service/nvdrv/nvdrv_interface.h
hle/service/nvdrv/nvmemp.cpp
hle/service/nvdrv/nvmemp.h
hle/service/nvdrv/syncpoint_manager.cpp
@@ -502,10 +503,10 @@ add_library(core STATIC
hle/service/olsc/olsc.h
hle/service/pcie/pcie.cpp
hle/service/pcie/pcie.h
- hle/service/pctl/module.cpp
- hle/service/pctl/module.h
hle/service/pctl/pctl.cpp
hle/service/pctl/pctl.h
+ hle/service/pctl/pctl_module.cpp
+ hle/service/pctl/pctl_module.h
hle/service/pcv/pcv.cpp
hle/service/pcv/pcv.h
hle/service/pm/pm.cpp
@@ -516,6 +517,8 @@ add_library(core STATIC
hle/service/psc/psc.h
hle/service/ptm/psm.cpp
hle/service/ptm/psm.h
+ hle/service/kernel_helpers.cpp
+ hle/service/kernel_helpers.h
hle/service/service.cpp
hle/service/service.h
hle/service/set/set.cpp
@@ -528,10 +531,10 @@ add_library(core STATIC
hle/service/set/set_sys.h
hle/service/set/settings.cpp
hle/service/set/settings.h
- hle/service/sm/controller.cpp
- hle/service/sm/controller.h
hle/service/sm/sm.cpp
hle/service/sm/sm.h
+ hle/service/sm/sm_controller.cpp
+ hle/service/sm/sm_controller.h
hle/service/sockets/bsd.cpp
hle/service/sockets/bsd.h
hle/service/sockets/ethc.cpp
@@ -546,18 +549,18 @@ add_library(core STATIC
hle/service/sockets/sockets_translate.h
hle/service/spl/csrng.cpp
hle/service/spl/csrng.h
- hle/service/spl/module.cpp
- hle/service/spl/module.h
hle/service/spl/spl.cpp
hle/service/spl/spl.h
+ hle/service/spl/spl_module.cpp
+ hle/service/spl/spl_module.h
+ hle/service/spl/spl_results.h
+ hle/service/spl/spl_types.h
hle/service/ssl/ssl.cpp
hle/service/ssl/ssl.h
hle/service/time/clock_types.h
hle/service/time/ephemeral_network_system_clock_context_writer.h
hle/service/time/ephemeral_network_system_clock_core.h
hle/service/time/errors.h
- hle/service/time/interface.cpp
- hle/service/time/interface.h
hle/service/time/local_system_clock_context_writer.h
hle/service/time/network_system_clock_context_writer.h
hle/service/time/standard_local_system_clock_core.h
@@ -575,6 +578,8 @@ add_library(core STATIC
hle/service/time/tick_based_steady_clock_core.h
hle/service/time/time.cpp
hle/service/time/time.h
+ hle/service/time/time_interface.cpp
+ hle/service/time/time_interface.h
hle/service/time/time_manager.cpp
hle/service/time/time_manager.h
hle/service/time/time_sharedmemory.cpp
@@ -651,24 +656,19 @@ endif()
if (MSVC)
target_compile_options(core PRIVATE
- /we4018 # 'expression' : signed/unsigned mismatch
- /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
- /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+ /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+ /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
+ /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
/we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
- /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
- /we4305 # 'context' : truncation from 'type1' to 'type2'
/we4456 # Declaration of 'identifier' hides previous local declaration
/we4457 # Declaration of 'identifier' hides function parameter
/we4458 # Declaration of 'identifier' hides class member
/we4459 # Declaration of 'identifier' hides global declaration
- /we4715 # 'function' : not all control paths return a value
)
else()
target_compile_options(core PRIVATE
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=sign-compare
-Werror=shadow
$<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index c8f6dc765..b0d89c539 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -8,6 +8,7 @@
#include <dynarmic/interface/A32/config.h>
#include <dynarmic/interface/A32/context.h>
#include "common/assert.h"
+#include "common/literals.h"
#include "common/logging/log.h"
#include "common/page_table.h"
#include "common/settings.h"
@@ -22,6 +23,8 @@
namespace Core {
+using namespace Common::Literals;
+
class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
public:
explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
@@ -143,11 +146,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
- config.code_cache_size = 512 * 1024 * 1024;
- config.far_code_offset = 400 * 1024 * 1024;
+ config.code_cache_size = 512_MiB;
+ config.far_code_offset = 400_MiB;
// Safe optimizations
- if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) {
+ if (Settings::values.cpu_debug_mode) {
if (!Settings::values.cpuopt_page_tables) {
config.page_table = nullptr;
}
@@ -180,17 +183,28 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
// Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true;
- if (Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_unfuse_fma) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
}
- if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
- if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr) {
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
+ }
+ if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
}
+ // Curated optimizations
+ if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
+ config.unsafe_optimizations = true;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+ }
+
return std::make_unique<Dynarmic::A32::Jit>(config);
}
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index ba524cd05..bf27ffe71 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -7,6 +7,7 @@
#include <dynarmic/interface/A64/a64.h>
#include <dynarmic/interface/A64/config.h>
#include "common/assert.h"
+#include "common/literals.h"
#include "common/logging/log.h"
#include "common/page_table.h"
#include "common/settings.h"
@@ -24,6 +25,7 @@
namespace Core {
using Vector = Dynarmic::A64::Vector;
+using namespace Common::Literals;
class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
public:
@@ -184,11 +186,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
- config.code_cache_size = 512 * 1024 * 1024;
- config.far_code_offset = 400 * 1024 * 1024;
+ config.code_cache_size = 512_MiB;
+ config.far_code_offset = 400_MiB;
// Safe optimizations
- if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) {
+ if (Settings::values.cpu_debug_mode) {
if (!Settings::values.cpuopt_page_tables) {
config.page_table = nullptr;
}
@@ -221,20 +223,28 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
// Unsafe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Unsafe) {
config.unsafe_optimizations = true;
- if (Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_unfuse_fma) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
}
- if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
}
- if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
- if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) {
+ if (Settings::values.cpuopt_unsafe_fastmem_check) {
config.fastmem_address_space_bits = 64;
}
}
+ // Curated optimizations
+ if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::Auto) {
+ config.unsafe_optimizations = true;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
+ config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+ config.fastmem_address_space_bits = 64;
+ }
+
return std::make_shared<Dynarmic::A64::Jit>(config);
}
diff --git a/src/core/core.cpp b/src/core/core.cpp
index c5004b7b4..d3e84c4ef 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <array>
+#include <atomic>
#include <memory>
#include <utility>
@@ -34,9 +35,9 @@
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/physical_core.h"
#include "core/hle/service/am/applets/applets.h"
-#include "core/hle/service/apm/controller.h"
+#include "core/hle/service/apm/apm_controller.h"
#include "core/hle/service/filesystem/filesystem.h"
-#include "core/hle/service/glue/manager.h"
+#include "core/hle/service/glue/glue_manager.h"
#include "core/hle/service/hid/hid.h"
#include "core/hle/service/service.h"
#include "core/hle/service/sm/sm.h"
@@ -215,9 +216,9 @@ struct System::Impl {
}
ResultStatus Load(System& system, Frontend::EmuWindow& emu_window, const std::string& filepath,
- std::size_t program_index) {
+ u64 program_id, std::size_t program_index) {
app_loader = Loader::GetLoader(system, GetGameFileFromPath(virtual_filesystem, filepath),
- program_index);
+ program_id, program_index);
if (!app_loader) {
LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath);
@@ -262,17 +263,16 @@ struct System::Impl {
if (Settings::values.gamecard_inserted) {
if (Settings::values.gamecard_current_game) {
fs_controller.SetGameCard(GetGameFileFromPath(virtual_filesystem, filepath));
- } else if (!Settings::values.gamecard_path.empty()) {
- fs_controller.SetGameCard(
- GetGameFileFromPath(virtual_filesystem, Settings::values.gamecard_path));
+ } else if (!Settings::values.gamecard_path.GetValue().empty()) {
+ const auto gamecard_path = Settings::values.gamecard_path.GetValue();
+ fs_controller.SetGameCard(GetGameFileFromPath(virtual_filesystem, gamecard_path));
}
}
- u64 title_id{0};
- if (app_loader->ReadProgramId(title_id) != Loader::ResultStatus::Success) {
+ if (app_loader->ReadProgramId(program_id) != Loader::ResultStatus::Success) {
LOG_ERROR(Core, "Failed to find title id for ROM (Error {})", load_result);
}
- perf_stats = std::make_unique<PerfStats>(title_id);
+ perf_stats = std::make_unique<PerfStats>(program_id);
// Reset counters and set time origin to current frame
GetAndResetPerfStats();
perf_stats->BeginSystemFrame();
@@ -377,7 +377,7 @@ struct System::Impl {
std::unique_ptr<Core::DeviceMemory> device_memory;
Core::Memory::Memory memory;
CpuManager cpu_manager;
- bool is_powered_on = false;
+ std::atomic_bool is_powered_on{};
bool exit_lock = false;
Reporter reporter;
@@ -411,7 +411,7 @@ struct System::Impl {
std::string status_details = "";
std::unique_ptr<Core::PerfStats> perf_stats;
- Core::FrameLimiter frame_limiter;
+ Core::SpeedLimiter speed_limiter;
bool is_multicore{};
bool is_async_gpu{};
@@ -458,12 +458,12 @@ void System::Shutdown() {
}
System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath,
- std::size_t program_index) {
- return impl->Load(*this, emu_window, filepath, program_index);
+ u64 program_id, std::size_t program_index) {
+ return impl->Load(*this, emu_window, filepath, program_id, program_index);
}
bool System::IsPoweredOn() const {
- return impl->is_powered_on;
+ return impl->is_powered_on.load(std::memory_order::relaxed);
}
void System::PrepareReschedule() {
@@ -606,12 +606,12 @@ const Core::PerfStats& System::GetPerfStats() const {
return *impl->perf_stats;
}
-Core::FrameLimiter& System::FrameLimiter() {
- return impl->frame_limiter;
+Core::SpeedLimiter& System::SpeedLimiter() {
+ return impl->speed_limiter;
}
-const Core::FrameLimiter& System::FrameLimiter() const {
- return impl->frame_limiter;
+const Core::SpeedLimiter& System::SpeedLimiter() const {
+ return impl->speed_limiter;
}
Loader::ResultStatus System::GetGameName(std::string& out) const {
diff --git a/src/core/core.h b/src/core/core.h
index 8b93ba998..ea143043c 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -94,7 +94,7 @@ class ARM_Interface;
class CpuManager;
class DeviceMemory;
class ExclusiveMonitor;
-class FrameLimiter;
+class SpeedLimiter;
class PerfStats;
class Reporter;
class TelemetrySession;
@@ -175,7 +175,7 @@ public:
* @returns ResultStatus code, indicating if the operation succeeded.
*/
[[nodiscard]] ResultStatus Load(Frontend::EmuWindow& emu_window, const std::string& filepath,
- std::size_t program_index = 0);
+ u64 program_id = 0, std::size_t program_index = 0);
/**
* Indicates if the emulated system is powered on (all subsystems initialized and able to run an
@@ -292,11 +292,11 @@ public:
/// Provides a constant reference to the internal PerfStats instance.
[[nodiscard]] const Core::PerfStats& GetPerfStats() const;
- /// Provides a reference to the frame limiter;
- [[nodiscard]] Core::FrameLimiter& FrameLimiter();
+ /// Provides a reference to the speed limiter;
+ [[nodiscard]] Core::SpeedLimiter& SpeedLimiter();
- /// Provides a constant referent to the frame limiter
- [[nodiscard]] const Core::FrameLimiter& FrameLimiter() const;
+ /// Provides a constant reference to the speed limiter
+ [[nodiscard]] const Core::SpeedLimiter& SpeedLimiter() const;
/// Gets the name of the current game
[[nodiscard]] Loader::ResultStatus GetGameName(std::string& out) const;
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index fb451a423..a98daed89 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -835,7 +835,7 @@ void KeyManager::SetKey(S128KeyType id, Key128 key, u64 field1, u64 field2) {
"key_area_key_ocean_{:02X}",
"key_area_key_system_{:02X}",
};
- WriteKeyToFile(category, fmt::format(kak_names.at(field2), field1), key);
+ WriteKeyToFile(category, fmt::format(fmt::runtime(kak_names.at(field2)), field1), key);
} else if (id == S128KeyType::Master) {
WriteKeyToFile(category, fmt::format("master_key_{:02X}", field1), key);
} else if (id == S128KeyType::Package1) {
diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp
index db2f6a955..755d3303e 100644
--- a/src/core/file_sys/card_image.cpp
+++ b/src/core/file_sys/card_image.cpp
@@ -29,7 +29,7 @@ constexpr std::array partition_names{
"logo",
};
-XCI::XCI(VirtualFile file_, std::size_t program_index)
+XCI::XCI(VirtualFile file_, u64 program_id, size_t program_index)
: file(std::move(file_)), program_nca_status{Loader::ResultStatus::ErrorXCIMissingProgramNCA},
partitions(partition_names.size()),
partitions_raw(partition_names.size()), keys{Core::Crypto::KeyManager::Instance()} {
@@ -63,12 +63,12 @@ XCI::XCI(VirtualFile file_, std::size_t program_index)
secure_partition = std::make_shared<NSP>(
main_hfs.GetFile(partition_names[static_cast<std::size_t>(XCIPartition::Secure)]),
- program_index);
+ program_id, program_index);
ncas = secure_partition->GetNCAsCollapsed();
program =
secure_partition->GetNCA(secure_partition->GetProgramTitleID(), ContentRecordType::Program);
- program_nca_status = secure_partition->GetProgramStatus(secure_partition->GetProgramTitleID());
+ program_nca_status = secure_partition->GetProgramStatus();
if (program_nca_status == Loader::ResultStatus::ErrorNSPMissingProgramNCA) {
program_nca_status = Loader::ResultStatus::ErrorXCIMissingProgramNCA;
}
@@ -174,6 +174,10 @@ u64 XCI::GetProgramTitleID() const {
return secure_partition->GetProgramTitleID();
}
+std::vector<u64> XCI::GetProgramTitleIDs() const {
+ return secure_partition->GetProgramTitleIDs();
+}
+
u32 XCI::GetSystemUpdateVersion() {
const auto update = GetPartition(XCIPartition::Update);
if (update == nullptr) {
@@ -229,9 +233,11 @@ const std::vector<std::shared_ptr<NCA>>& XCI::GetNCAs() const {
}
std::shared_ptr<NCA> XCI::GetNCAByType(NCAContentType type) const {
- const auto iter =
- std::find_if(ncas.begin(), ncas.end(),
- [type](const std::shared_ptr<NCA>& nca) { return nca->GetType() == type; });
+ const auto program_id = secure_partition->GetProgramTitleID();
+ const auto iter = std::find_if(
+ ncas.begin(), ncas.end(), [this, type, program_id](const std::shared_ptr<NCA>& nca) {
+ return nca->GetType() == type && nca->GetTitleId() == program_id;
+ });
return iter == ncas.end() ? nullptr : *iter;
}
diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h
index 4960e90fe..0fd9fa87c 100644
--- a/src/core/file_sys/card_image.h
+++ b/src/core/file_sys/card_image.h
@@ -78,7 +78,7 @@ enum class XCIPartition : u8 { Update, Normal, Secure, Logo };
class XCI : public ReadOnlyVfsDirectory {
public:
- explicit XCI(VirtualFile file, std::size_t program_index = 0);
+ explicit XCI(VirtualFile file, u64 program_id = 0, size_t program_index = 0);
~XCI() override;
Loader::ResultStatus GetStatus() const;
@@ -104,6 +104,7 @@ public:
VirtualFile GetLogoPartitionRaw() const;
u64 GetProgramTitleID() const;
+ std::vector<u64> GetProgramTitleIDs() const;
u32 GetSystemUpdateVersion();
u64 GetSystemUpdateTitleID() const;
diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp
index 24eff210f..7019a7a68 100644
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -5,7 +5,6 @@
#include <algorithm>
#include <cstring>
#include <optional>
-#include <ranges>
#include <utility>
#include "common/logging/log.h"
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 53b8b7ca0..7c0950bb0 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -345,8 +345,10 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type,
const Service::FileSystem::FileSystemController& fs_controller) {
const auto load_dir = fs_controller.GetModificationLoadRoot(title_id);
+ const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
- load_dir == nullptr || load_dir->GetSize() <= 0) {
+ ((load_dir == nullptr || load_dir->GetSize() <= 0) &&
+ (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) {
return;
}
@@ -356,7 +358,10 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
}
const auto& disabled = Settings::values.disabled_addons[title_id];
- auto patch_dirs = load_dir->GetSubdirectories();
+ std::vector<VirtualDir> patch_dirs = load_dir->GetSubdirectories();
+ if (std::find(disabled.cbegin(), disabled.cend(), "SDMC") == disabled.cend()) {
+ patch_dirs.push_back(sdmc_load_dir);
+ }
std::sort(patch_dirs.begin(), patch_dirs.end(),
[](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
@@ -402,7 +407,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
}
VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type,
- VirtualFile update_raw) const {
+ VirtualFile update_raw, bool apply_layeredfs) const {
const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}",
title_id, static_cast<u8>(type));
@@ -442,7 +447,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
}
// LayeredFS
- ApplyLayeredFS(romfs, title_id, type, fs_controller);
+ if (apply_layeredfs) {
+ ApplyLayeredFS(romfs, title_id, type, fs_controller);
+ }
return romfs;
}
@@ -524,6 +531,15 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
}
}
+ // SDMC mod directory (RomFS LayeredFS)
+ const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
+ if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0 &&
+ IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "romfs"))) {
+ const auto mod_disabled =
+ std::find(disabled.begin(), disabled.end(), "SDMC") != disabled.end();
+ out.insert_or_assign(mod_disabled ? "[D] SDMC" : "SDMC", "LayeredFS");
+ }
+
// DLC
const auto dlc_entries =
content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index fb1853035..3be871f35 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -64,7 +64,8 @@ public:
// - LayeredFS
[[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset,
ContentRecordType type = ContentRecordType::Program,
- VirtualFile update_raw = nullptr) const;
+ VirtualFile update_raw = nullptr,
+ bool apply_layeredfs = true) const;
// Returns a vector of pairs between patch names and patch versions.
// i.e. Update 3.2.2 will return {"Update", "3.2.2"}
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index 066c6789a..7a646b5f1 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -58,14 +58,17 @@ static bool FollowsNcaIdFormat(std::string_view name) {
static std::string GetRelativePathFromNcaID(const std::array<u8, 16>& nca_id, bool second_hex_upper,
bool within_two_digit, bool cnmt_suffix) {
- if (!within_two_digit)
- return fmt::format(cnmt_suffix ? "{}.cnmt.nca" : "/{}.nca",
- Common::HexToString(nca_id, second_hex_upper));
+ if (!within_two_digit) {
+ const auto format_str = fmt::runtime(cnmt_suffix ? "{}.cnmt.nca" : "/{}.nca");
+ return fmt::format(format_str, Common::HexToString(nca_id, second_hex_upper));
+ }
Core::Crypto::SHA256Hash hash{};
mbedtls_sha256_ret(nca_id.data(), nca_id.size(), hash.data(), 0);
- return fmt::format(cnmt_suffix ? "/000000{:02X}/{}.cnmt.nca" : "/000000{:02X}/{}.nca", hash[0],
- Common::HexToString(nca_id, second_hex_upper));
+
+ const auto format_str =
+ fmt::runtime(cnmt_suffix ? "/000000{:02X}/{}.cnmt.nca" : "/000000{:02X}/{}.nca");
+ return fmt::format(format_str, hash[0], Common::HexToString(nca_id, second_hex_upper));
}
static std::string GetCNMTName(TitleType type, u64 title_id) {
diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp
index cb56d8f2d..e5c72cd4d 100644
--- a/src/core/file_sys/sdmc_factory.cpp
+++ b/src/core/file_sys/sdmc_factory.cpp
@@ -12,23 +12,32 @@ namespace FileSys {
constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB
-SDMCFactory::SDMCFactory(VirtualDir dir_)
- : dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>(
- GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"),
- [](const VirtualFile& file, const NcaID& id) {
- return NAX{file, id}.GetDecrypted();
- })),
+SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_)
+ : sd_dir(std::move(sd_dir_)), sd_mod_dir(std::move(sd_mod_dir_)),
+ contents(std::make_unique<RegisteredCache>(
+ GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/registered"),
+ [](const VirtualFile& file, const NcaID& id) {
+ return NAX{file, id}.GetDecrypted();
+ })),
placeholder(std::make_unique<PlaceholderCache>(
- GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/placehld"))) {}
+ GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/placehld"))) {}
SDMCFactory::~SDMCFactory() = default;
ResultVal<VirtualDir> SDMCFactory::Open() const {
- return MakeResult<VirtualDir>(dir);
+ return MakeResult<VirtualDir>(sd_dir);
+}
+
+VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const {
+ // LayeredFS doesn't work on updates and title id-less homebrew
+ if (title_id == 0 || (title_id & 0xFFF) == 0x800) {
+ return nullptr;
+ }
+ return GetOrCreateDirectoryRelative(sd_mod_dir, fmt::format("/{:016X}", title_id));
}
VirtualDir SDMCFactory::GetSDMCContentDirectory() const {
- return GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents");
+ return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents");
}
RegisteredCache* SDMCFactory::GetSDMCContents() const {
@@ -40,11 +49,11 @@ PlaceholderCache* SDMCFactory::GetSDMCPlaceholder() const {
}
VirtualDir SDMCFactory::GetImageDirectory() const {
- return GetOrCreateDirectoryRelative(dir, "/Nintendo/Album");
+ return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Album");
}
u64 SDMCFactory::GetSDMCFreeSpace() const {
- return GetSDMCTotalSpace() - dir->GetSize();
+ return GetSDMCTotalSpace() - sd_dir->GetSize();
}
u64 SDMCFactory::GetSDMCTotalSpace() const {
diff --git a/src/core/file_sys/sdmc_factory.h b/src/core/file_sys/sdmc_factory.h
index 2bb92ba93..3a3d11f3a 100644
--- a/src/core/file_sys/sdmc_factory.h
+++ b/src/core/file_sys/sdmc_factory.h
@@ -16,11 +16,12 @@ class PlaceholderCache;
/// File system interface to the SDCard archive
class SDMCFactory {
public:
- explicit SDMCFactory(VirtualDir dir);
+ explicit SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_);
~SDMCFactory();
ResultVal<VirtualDir> Open() const;
+ VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
VirtualDir GetSDMCContentDirectory() const;
RegisteredCache* GetSDMCContents() const;
@@ -32,7 +33,8 @@ public:
u64 GetSDMCTotalSpace() const;
private:
- VirtualDir dir;
+ VirtualDir sd_dir;
+ VirtualDir sd_mod_dir;
std::unique_ptr<RegisteredCache> contents;
std::unique_ptr<PlaceholderCache> placeholder;
diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp
index d51d469e3..f192dffa5 100644
--- a/src/core/file_sys/submission_package.cpp
+++ b/src/core/file_sys/submission_package.cpp
@@ -20,8 +20,9 @@
namespace FileSys {
-NSP::NSP(VirtualFile file_, std::size_t program_index_)
- : file(std::move(file_)), program_index(program_index_), status{Loader::ResultStatus::Success},
+NSP::NSP(VirtualFile file_, u64 title_id_, std::size_t program_index_)
+ : file(std::move(file_)), expected_program_id(title_id_),
+ program_index(program_index_), status{Loader::ResultStatus::Success},
pfs(std::make_shared<PartitionFilesystem>(file)), keys{Core::Crypto::KeyManager::Instance()} {
if (pfs->GetStatus() != Loader::ResultStatus::Success) {
status = pfs->GetStatus();
@@ -46,60 +47,59 @@ Loader::ResultStatus NSP::GetStatus() const {
return status;
}
-Loader::ResultStatus NSP::GetProgramStatus(u64 title_id) const {
+Loader::ResultStatus NSP::GetProgramStatus() const {
if (IsExtractedType() && GetExeFS() != nullptr && FileSys::IsDirectoryExeFS(GetExeFS())) {
return Loader::ResultStatus::Success;
}
- const auto iter = program_status.find(title_id);
+ const auto iter = program_status.find(GetProgramTitleID());
if (iter == program_status.end())
return Loader::ResultStatus::ErrorNSPMissingProgramNCA;
return iter->second;
}
-u64 NSP::GetFirstTitleID() const {
- if (IsExtractedType()) {
- return GetProgramTitleID();
- }
-
- if (program_status.empty())
- return 0;
- return program_status.begin()->first;
-}
-
u64 NSP::GetProgramTitleID() const {
if (IsExtractedType()) {
- if (GetExeFS() == nullptr || !IsDirectoryExeFS(GetExeFS())) {
- return 0;
- }
+ return GetExtractedTitleID() + program_index;
+ }
- ProgramMetadata meta;
- if (meta.Load(GetExeFS()->GetFile("main.npdm")) == Loader::ResultStatus::Success) {
- return meta.GetTitleID();
- } else {
- return 0;
+ auto program_id = expected_program_id;
+ if (program_id == 0) {
+ if (!program_status.empty()) {
+ program_id = program_status.begin()->first;
}
}
- const auto out = GetFirstTitleID();
- if ((out & 0x800) == 0)
- return out;
+ program_id = program_id + program_index;
+ if (program_status.find(program_id) != program_status.end()) {
+ return program_id;
+ }
- const auto ids = GetTitleIDs();
+ const auto ids = GetProgramTitleIDs();
const auto iter =
std::find_if(ids.begin(), ids.end(), [](u64 tid) { return (tid & 0x800) == 0; });
- return iter == ids.end() ? out : *iter;
+ return iter == ids.end() ? 0 : *iter;
+}
+
+u64 NSP::GetExtractedTitleID() const {
+ if (GetExeFS() == nullptr || !IsDirectoryExeFS(GetExeFS())) {
+ return 0;
+ }
+
+ ProgramMetadata meta;
+ if (meta.Load(GetExeFS()->GetFile("main.npdm")) == Loader::ResultStatus::Success) {
+ return meta.GetTitleID();
+ } else {
+ return 0;
+ }
}
-std::vector<u64> NSP::GetTitleIDs() const {
+std::vector<u64> NSP::GetProgramTitleIDs() const {
if (IsExtractedType()) {
- return {GetProgramTitleID()};
+ return {GetExtractedTitleID()};
}
- std::vector<u64> out;
- out.reserve(ncas.size());
- for (const auto& kv : ncas)
- out.push_back(kv.first);
+ std::vector<u64> out{program_ids.cbegin(), program_ids.cend()};
return out;
}
@@ -146,7 +146,7 @@ std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type, TitleType
if (extracted)
LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
- const auto title_id_iter = ncas.find(title_id + program_index);
+ const auto title_id_iter = ncas.find(title_id);
if (title_id_iter == ncas.end())
return nullptr;
@@ -160,7 +160,7 @@ std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type, TitleType
VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type, TitleType title_type) const {
if (extracted)
LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
- const auto nca = GetNCA(title_id, type);
+ const auto nca = GetNCA(title_id, type, title_type);
if (nca != nullptr)
return nca->GetBaseFile();
return nullptr;
@@ -286,6 +286,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
if (next_nca->GetType() == NCAContentType::Program) {
program_status[next_nca->GetTitleId()] = next_nca->GetStatus();
+ program_ids.insert(next_nca->GetTitleId() & 0xFFFFFFFFFFFFF000);
}
if (next_nca->GetStatus() != Loader::ResultStatus::Success &&
diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h
index ecb3b6f15..030f36c09 100644
--- a/src/core/file_sys/submission_package.h
+++ b/src/core/file_sys/submission_package.h
@@ -6,6 +6,7 @@
#include <map>
#include <memory>
+#include <set>
#include <vector>
#include "common/common_types.h"
#include "core/file_sys/vfs.h"
@@ -27,15 +28,15 @@ enum class ContentRecordType : u8;
class NSP : public ReadOnlyVfsDirectory {
public:
- explicit NSP(VirtualFile file_, std::size_t program_index_ = 0);
+ explicit NSP(VirtualFile file_, u64 title_id = 0, std::size_t program_index_ = 0);
~NSP() override;
Loader::ResultStatus GetStatus() const;
- Loader::ResultStatus GetProgramStatus(u64 title_id) const;
+ Loader::ResultStatus GetProgramStatus() const;
// Should only be used when one title id can be assured.
- u64 GetFirstTitleID() const;
u64 GetProgramTitleID() const;
- std::vector<u64> GetTitleIDs() const;
+ u64 GetExtractedTitleID() const;
+ std::vector<u64> GetProgramTitleIDs() const;
bool IsExtractedType() const;
@@ -69,6 +70,7 @@ private:
VirtualFile file;
+ const u64 expected_program_id;
const std::size_t program_index;
bool extracted = false;
@@ -78,6 +80,7 @@ private:
std::shared_ptr<PartitionFilesystem> pfs;
// Map title id -> {map type -> NCA}
std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> ncas;
+ std::set<u64> program_ids;
std::vector<VirtualFile> ticket_files;
Core::Crypto::KeyManager& keys;
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
index 54704105b..9b76d007e 100644
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -4,47 +4,29 @@
#include "core/file_sys/system_archive/system_version.h"
#include "core/file_sys/vfs_vector.h"
+#include "core/hle/api_version.h"
namespace FileSys::SystemArchive {
-namespace SystemVersionData {
-
-// This section should reflect the best system version to describe yuzu's HLE api.
-// TODO(DarkLordZach): Update when HLE gets better.
-
-constexpr u8 VERSION_MAJOR = 11;
-constexpr u8 VERSION_MINOR = 0;
-constexpr u8 VERSION_MICRO = 1;
-
-constexpr u8 REVISION_MAJOR = 1;
-constexpr u8 REVISION_MINOR = 0;
-
-constexpr char PLATFORM_STRING[] = "NX";
-constexpr char VERSION_HASH[] = "69103fcb2004dace877094c2f8c29e6113be5dbf";
-constexpr char DISPLAY_VERSION[] = "11.0.1";
-constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 11.0.1-1.0";
-
-} // namespace SystemVersionData
-
std::string GetLongDisplayVersion() {
- return SystemVersionData::DISPLAY_TITLE;
+ return HLE::ApiVersion::DISPLAY_TITLE;
}
VirtualDir SystemVersion() {
VirtualFile file = std::make_shared<VectorVfsFile>(std::vector<u8>(0x100), "file");
- file->WriteObject(SystemVersionData::VERSION_MAJOR, 0);
- file->WriteObject(SystemVersionData::VERSION_MINOR, 1);
- file->WriteObject(SystemVersionData::VERSION_MICRO, 2);
- file->WriteObject(SystemVersionData::REVISION_MAJOR, 4);
- file->WriteObject(SystemVersionData::REVISION_MINOR, 5);
- file->WriteArray(SystemVersionData::PLATFORM_STRING,
- std::min<u64>(sizeof(SystemVersionData::PLATFORM_STRING), 0x20ULL), 0x8);
- file->WriteArray(SystemVersionData::VERSION_HASH,
- std::min<u64>(sizeof(SystemVersionData::VERSION_HASH), 0x40ULL), 0x28);
- file->WriteArray(SystemVersionData::DISPLAY_VERSION,
- std::min<u64>(sizeof(SystemVersionData::DISPLAY_VERSION), 0x18ULL), 0x68);
- file->WriteArray(SystemVersionData::DISPLAY_TITLE,
- std::min<u64>(sizeof(SystemVersionData::DISPLAY_TITLE), 0x80ULL), 0x80);
+ file->WriteObject(HLE::ApiVersion::HOS_VERSION_MAJOR, 0);
+ file->WriteObject(HLE::ApiVersion::HOS_VERSION_MINOR, 1);
+ file->WriteObject(HLE::ApiVersion::HOS_VERSION_MICRO, 2);
+ file->WriteObject(HLE::ApiVersion::SDK_REVISION_MAJOR, 4);
+ file->WriteObject(HLE::ApiVersion::SDK_REVISION_MINOR, 5);
+ file->WriteArray(HLE::ApiVersion::PLATFORM_STRING,
+ std::min<u64>(sizeof(HLE::ApiVersion::PLATFORM_STRING), 0x20ULL), 0x8);
+ file->WriteArray(HLE::ApiVersion::VERSION_HASH,
+ std::min<u64>(sizeof(HLE::ApiVersion::VERSION_HASH), 0x40ULL), 0x28);
+ file->WriteArray(HLE::ApiVersion::DISPLAY_VERSION,
+ std::min<u64>(sizeof(HLE::ApiVersion::DISPLAY_VERSION), 0x18ULL), 0x68);
+ file->WriteArray(HLE::ApiVersion::DISPLAY_TITLE,
+ std::min<u64>(sizeof(HLE::ApiVersion::DISPLAY_TITLE), 0x80ULL), 0x80);
return std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{file},
std::vector<VirtualDir>{}, "data");
}
diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp
index d0b8fd046..3dad54f49 100644
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -24,17 +24,12 @@ constexpr FS::FileAccessMode ModeFlagsToFileAccessMode(Mode mode) {
case Mode::Read:
return FS::FileAccessMode::Read;
case Mode::Write:
- return FS::FileAccessMode::Write;
case Mode::ReadWrite:
- return FS::FileAccessMode::ReadWrite;
case Mode::Append:
- return FS::FileAccessMode::Append;
case Mode::ReadAppend:
- return FS::FileAccessMode::ReadAppend;
case Mode::WriteAppend:
- return FS::FileAccessMode::Append;
case Mode::All:
- return FS::FileAccessMode::ReadAppend;
+ return FS::FileAccessMode::ReadWrite;
default:
return {};
}
diff --git a/src/core/frontend/applets/profile_select.cpp b/src/core/frontend/applets/profile_select.cpp
index 8d960d1ca..4c58c310f 100644
--- a/src/core/frontend/applets/profile_select.cpp
+++ b/src/core/frontend/applets/profile_select.cpp
@@ -13,7 +13,7 @@ ProfileSelectApplet::~ProfileSelectApplet() = default;
void DefaultProfileSelectApplet::SelectProfile(
std::function<void(std::optional<Common::UUID>)> callback) const {
Service::Account::ProfileManager manager;
- callback(manager.GetUser(Settings::values.current_user).value_or(Common::UUID{}));
+ callback(manager.GetUser(Settings::values.current_user.GetValue()).value_or(Common::UUID{}));
LOG_INFO(Service_ACC, "called, selecting current user instead of prompting...");
}
diff --git a/src/core/frontend/applets/software_keyboard.h b/src/core/frontend/applets/software_keyboard.h
index 506eb35bb..228a548d4 100644
--- a/src/core/frontend/applets/software_keyboard.h
+++ b/src/core/frontend/applets/software_keyboard.h
@@ -9,7 +9,7 @@
#include "common/common_types.h"
-#include "core/hle/service/am/applets/software_keyboard_types.h"
+#include "core/hle/service/am/applets/applet_software_keyboard_types.h"
namespace Core::Frontend {
diff --git a/src/core/frontend/applets/web_browser.h b/src/core/frontend/applets/web_browser.h
index d7bd44c27..915dde677 100644
--- a/src/core/frontend/applets/web_browser.h
+++ b/src/core/frontend/applets/web_browser.h
@@ -7,7 +7,7 @@
#include <functional>
#include <string_view>
-#include "core/hle/service/am/applets/web_types.h"
+#include "core/hle/service/am/applets/applet_web_browser_types.h"
namespace Core::Frontend {
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 7a047803e..f1747c5b2 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -4,6 +4,7 @@
#pragma once
+#include <functional>
#include <memory>
#include <string>
#include <tuple>
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp
index 645f26e91..290db505e 100644
--- a/src/core/hardware_interrupt_manager.cpp
+++ b/src/core/hardware_interrupt_manager.cpp
@@ -5,7 +5,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hardware_interrupt_manager.h"
-#include "core/hle/service/nvdrv/interface.h"
+#include "core/hle/service/nvdrv/nvdrv_interface.h"
#include "core/hle/service/sm/sm.h"
namespace Core::Hardware {
diff --git a/src/core/hle/api_version.h b/src/core/hle/api_version.h
new file mode 100644
index 000000000..5e10a7ad9
--- /dev/null
+++ b/src/core/hle/api_version.h
@@ -0,0 +1,40 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+// This file contains yuzu's HLE API version constants.
+
+namespace HLE::ApiVersion {
+
+// Horizon OS version constants.
+
+constexpr u8 HOS_VERSION_MAJOR = 11;
+constexpr u8 HOS_VERSION_MINOR = 0;
+constexpr u8 HOS_VERSION_MICRO = 1;
+
+// NintendoSDK version constants.
+
+constexpr u8 SDK_REVISION_MAJOR = 1;
+constexpr u8 SDK_REVISION_MINOR = 0;
+
+constexpr char PLATFORM_STRING[] = "NX";
+constexpr char VERSION_HASH[] = "69103fcb2004dace877094c2f8c29e6113be5dbf";
+constexpr char DISPLAY_VERSION[] = "11.0.1";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 11.0.1-1.0";
+
+// Atmosphere version constants.
+
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 4;
+
+constexpr u32 GetTargetFirmware() {
+ return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 |
+ u32{HOS_VERSION_MICRO} << 8 | 0U;
+}
+
+} // namespace HLE::ApiVersion
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 61bda3786..ceff2532d 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -345,8 +345,12 @@ public:
explicit RequestParser(u32* command_buffer) : RequestHelperBase(command_buffer) {}
explicit RequestParser(Kernel::HLERequestContext& ctx) : RequestHelperBase(ctx) {
- ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
- Skip(ctx.GetDataPayloadOffset(), false);
+ // TIPC does not have data payload offset
+ if (!ctx.IsTipc()) {
+ ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
+ Skip(ctx.GetDataPayloadOffset(), false);
+ }
+
// Skip the u64 command id, it's already stored in the context
static constexpr u32 CommandIdSize = 2;
Skip(CommandIdSize, false);
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
index 86472b5ce..6f335c251 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -4,7 +4,8 @@
#include <random>
-#include "common/common_sizes.h"
+#include "common/literals.h"
+
#include "core/hle/kernel/board/nintendo/nx/k_system_control.h"
#include "core/hle/kernel/board/nintendo/nx/secure_monitor.h"
#include "core/hle/kernel/k_trace.h"
@@ -25,6 +26,8 @@ constexpr const std::size_t RequiredNonSecureSystemMemorySize =
namespace {
+using namespace Common::Literals;
+
u32 GetMemoryModeForInit() {
return 0x01;
}
@@ -57,11 +60,11 @@ size_t KSystemControl::Init::GetIntendedMemorySize() {
switch (GetMemorySizeForInit()) {
case Smc::MemorySize_4GB:
default: // All invalid modes should go to 4GB.
- return Common::Size_4_GB;
+ return 4_GiB;
case Smc::MemorySize_6GB:
- return Common::Size_6_GB;
+ return 6_GiB;
case Smc::MemorySize_8GB:
- return Common::Size_8_GB;
+ return 8_GiB;
}
}
@@ -79,17 +82,17 @@ std::size_t KSystemControl::Init::GetApplicationPoolSize() {
switch (GetMemoryArrangeForInit()) {
case Smc::MemoryArrangement_4GB:
default:
- return Common::Size_3285_MB;
+ return 3285_MiB;
case Smc::MemoryArrangement_4GBForAppletDev:
- return Common::Size_2048_MB;
+ return 2048_MiB;
case Smc::MemoryArrangement_4GBForSystemDev:
- return Common::Size_3285_MB;
+ return 3285_MiB;
case Smc::MemoryArrangement_6GB:
- return Common::Size_4916_MB;
+ return 4916_MiB;
case Smc::MemoryArrangement_6GBForAppletDev:
- return Common::Size_3285_MB;
+ return 3285_MiB;
case Smc::MemoryArrangement_8GB:
- return Common::Size_4916_MB;
+ return 4916_MiB;
}
}();
@@ -103,22 +106,22 @@ size_t KSystemControl::Init::GetAppletPoolSize() {
switch (GetMemoryArrangeForInit()) {
case Smc::MemoryArrangement_4GB:
default:
- return Common::Size_507_MB;
+ return 507_MiB;
case Smc::MemoryArrangement_4GBForAppletDev:
- return Common::Size_1554_MB;
+ return 1554_MiB;
case Smc::MemoryArrangement_4GBForSystemDev:
- return Common::Size_448_MB;
+ return 448_MiB;
case Smc::MemoryArrangement_6GB:
- return Common::Size_562_MB;
+ return 562_MiB;
case Smc::MemoryArrangement_6GBForAppletDev:
- return Common::Size_2193_MB;
+ return 2193_MiB;
case Smc::MemoryArrangement_8GB:
- return Common::Size_2193_MB;
+ return 2193_MiB;
}
}();
// Return (possibly) adjusted size.
- constexpr size_t ExtraSystemMemoryForAtmosphere = Common::Size_33_MB;
+ constexpr size_t ExtraSystemMemoryForAtmosphere = 33_MiB;
return base_pool_size - ExtraSystemMemoryForAtmosphere - KTraceBufferSize;
}
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 28ed6265a..ca68fc325 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -58,6 +58,9 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
void SessionRequestHandler::ClientConnected(KServerSession* session) {
session->ClientConnected(shared_from_this());
+
+ // Ensure our server session is tracked globally.
+ kernel.RegisterServerSession(session);
}
void SessionRequestHandler::ClientDisconnected(KServerSession* session) {
diff --git a/src/core/hle/kernel/k_address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp
index c7549f7a2..ca29edc88 100644
--- a/src/core/hle/kernel/k_address_space_info.cpp
+++ b/src/core/hle/kernel/k_address_space_info.cpp
@@ -5,34 +5,37 @@
#include <array>
#include "common/assert.h"
-#include "common/common_sizes.h"
+#include "common/literals.h"
#include "core/hle/kernel/k_address_space_info.h"
namespace Kernel {
namespace {
+using namespace Common::Literals;
+
+constexpr u64 Size_Invalid = UINT64_MAX;
+
// clang-format off
constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{
- { .bit_width = 32, .address = Common::Size_2_MB , .size = Common::Size_1_GB - Common::Size_2_MB , .type = KAddressSpaceInfo::Type::MapSmall, },
- { .bit_width = 32, .address = Common::Size_1_GB , .size = Common::Size_4_GB - Common::Size_1_GB , .type = KAddressSpaceInfo::Type::MapLarge, },
- { .bit_width = 32, .address = Common::Size_Invalid, .size = Common::Size_1_GB , .type = KAddressSpaceInfo::Type::Alias, },
- { .bit_width = 32, .address = Common::Size_Invalid, .size = Common::Size_1_GB , .type = KAddressSpaceInfo::Type::Heap, },
- { .bit_width = 36, .address = Common::Size_128_MB , .size = Common::Size_2_GB - Common::Size_128_MB, .type = KAddressSpaceInfo::Type::MapSmall, },
- { .bit_width = 36, .address = Common::Size_2_GB , .size = Common::Size_64_GB - Common::Size_2_GB , .type = KAddressSpaceInfo::Type::MapLarge, },
- { .bit_width = 36, .address = Common::Size_Invalid, .size = Common::Size_6_GB , .type = KAddressSpaceInfo::Type::Heap, },
- { .bit_width = 36, .address = Common::Size_Invalid, .size = Common::Size_6_GB , .type = KAddressSpaceInfo::Type::Alias, },
- { .bit_width = 39, .address = Common::Size_128_MB , .size = Common::Size_512_GB - Common::Size_128_MB, .type = KAddressSpaceInfo::Type::Map39Bit, },
- { .bit_width = 39, .address = Common::Size_Invalid, .size = Common::Size_64_GB , .type = KAddressSpaceInfo::Type::MapSmall },
- { .bit_width = 39, .address = Common::Size_Invalid, .size = Common::Size_6_GB , .type = KAddressSpaceInfo::Type::Heap, },
- { .bit_width = 39, .address = Common::Size_Invalid, .size = Common::Size_64_GB , .type = KAddressSpaceInfo::Type::Alias, },
- { .bit_width = 39, .address = Common::Size_Invalid, .size = Common::Size_2_GB , .type = KAddressSpaceInfo::Type::Stack, },
+ { .bit_width = 32, .address = 2_MiB , .size = 1_GiB - 2_MiB , .type = KAddressSpaceInfo::Type::MapSmall, },
+ { .bit_width = 32, .address = 1_GiB , .size = 4_GiB - 1_GiB , .type = KAddressSpaceInfo::Type::MapLarge, },
+ { .bit_width = 32, .address = Size_Invalid, .size = 1_GiB , .type = KAddressSpaceInfo::Type::Alias, },
+ { .bit_width = 32, .address = Size_Invalid, .size = 1_GiB , .type = KAddressSpaceInfo::Type::Heap, },
+ { .bit_width = 36, .address = 128_MiB , .size = 2_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::MapSmall, },
+ { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, },
+ { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Heap, },
+ { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, },
+ { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
+ { .bit_width = 39, .address = Size_Invalid, .size = 64_GiB , .type = KAddressSpaceInfo::Type::MapSmall },
+ { .bit_width = 39, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Heap, },
+ { .bit_width = 39, .address = Size_Invalid, .size = 64_GiB , .type = KAddressSpaceInfo::Type::Alias, },
+ { .bit_width = 39, .address = Size_Invalid, .size = 2_GiB , .type = KAddressSpaceInfo::Type::Stack, },
}};
// clang-format on
constexpr bool IsAllowedIndexForAddress(std::size_t index) {
- return index < AddressSpaceInfos.size() &&
- AddressSpaceInfos[index].address != Common::Size_Invalid;
+ return index < AddressSpaceInfos.size() && AddressSpaceInfos[index].address != Size_Invalid;
}
using IndexArray =
diff --git a/src/core/hle/kernel/k_auto_object.cpp b/src/core/hle/kernel/k_auto_object.cpp
index dbe237f09..c99a9ebb7 100644
--- a/src/core/hle/kernel/k_auto_object.cpp
+++ b/src/core/hle/kernel/k_auto_object.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "core/hle/kernel/k_auto_object.h"
+#include "core/hle/kernel/kernel.h"
namespace Kernel {
@@ -11,4 +12,12 @@ KAutoObject* KAutoObject::Create(KAutoObject* obj) {
return obj;
}
+void KAutoObject::RegisterWithKernel() {
+ kernel.RegisterKernelObject(this);
+}
+
+void KAutoObject::UnregisterWithKernel() {
+ kernel.UnregisterKernelObject(this);
+}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h
index 88a052f65..e4fcdbc67 100644
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -85,8 +85,12 @@ private:
KERNEL_AUTOOBJECT_TRAITS(KAutoObject, KAutoObject);
public:
- explicit KAutoObject(KernelCore& kernel_) : kernel(kernel_) {}
- virtual ~KAutoObject() = default;
+ explicit KAutoObject(KernelCore& kernel_) : kernel(kernel_) {
+ RegisterWithKernel();
+ }
+ virtual ~KAutoObject() {
+ UnregisterWithKernel();
+ }
static KAutoObject* Create(KAutoObject* ptr);
@@ -166,6 +170,10 @@ public:
}
}
+private:
+ void RegisterWithKernel();
+ void UnregisterWithKernel();
+
protected:
KernelCore& kernel;
std::string name;
diff --git a/src/core/hle/kernel/k_memory_layout.board.nintendo_nx.cpp b/src/core/hle/kernel/k_memory_layout.board.nintendo_nx.cpp
index a78551291..af652af58 100644
--- a/src/core/hle/kernel/k_memory_layout.board.nintendo_nx.cpp
+++ b/src/core/hle/kernel/k_memory_layout.board.nintendo_nx.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "common/alignment.h"
+#include "common/literals.h"
#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_system_control.h"
@@ -12,8 +13,10 @@ namespace Kernel {
namespace {
+using namespace Common::Literals;
+
constexpr size_t CarveoutAlignment = 0x20000;
-constexpr size_t CarveoutSizeMax = (512ULL * 1024 * 1024) - CarveoutAlignment;
+constexpr size_t CarveoutSizeMax = (512_MiB) - CarveoutAlignment;
bool SetupPowerManagementControllerMemoryRegion(KMemoryLayout& memory_layout) {
// Above firmware 2.0.0, the PMC is not mappable.
diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h
index 288642d9a..57ff538cc 100644
--- a/src/core/hle/kernel/k_memory_layout.h
+++ b/src/core/hle/kernel/k_memory_layout.h
@@ -7,8 +7,7 @@
#include <utility>
#include "common/alignment.h"
-#include "common/common_sizes.h"
-#include "common/common_types.h"
+#include "common/literals.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_memory_region.h"
#include "core/hle/kernel/k_memory_region_type.h"
@@ -16,20 +15,22 @@
namespace Kernel {
-constexpr std::size_t L1BlockSize = Common::Size_1_GB;
-constexpr std::size_t L2BlockSize = Common::Size_2_MB;
+using namespace Common::Literals;
+
+constexpr std::size_t L1BlockSize = 1_GiB;
+constexpr std::size_t L2BlockSize = 2_MiB;
constexpr std::size_t GetMaximumOverheadSize(std::size_t size) {
return (Common::DivideUp(size, L1BlockSize) + Common::DivideUp(size, L2BlockSize)) * PageSize;
}
-constexpr std::size_t MainMemorySize = Common::Size_4_GB;
-constexpr std::size_t MainMemorySizeMax = Common::Size_8_GB;
+constexpr std::size_t MainMemorySize = 4_GiB;
+constexpr std::size_t MainMemorySizeMax = 8_GiB;
-constexpr std::size_t ReservedEarlyDramSize = 0x60000;
+constexpr std::size_t ReservedEarlyDramSize = 384_KiB;
constexpr std::size_t DramPhysicalAddress = 0x80000000;
-constexpr std::size_t KernelAslrAlignment = Common::Size_2_MB;
+constexpr std::size_t KernelAslrAlignment = 2_MiB;
constexpr std::size_t KernelVirtualAddressSpaceWidth = 1ULL << 39;
constexpr std::size_t KernelPhysicalAddressSpaceWidth = 1ULL << 48;
@@ -40,7 +41,7 @@ constexpr std::size_t KernelVirtualAddressSpaceLast = KernelVirtualAddressSpaceE
constexpr std::size_t KernelVirtualAddressSpaceSize =
KernelVirtualAddressSpaceEnd - KernelVirtualAddressSpaceBase;
constexpr std::size_t KernelVirtualAddressCodeBase = KernelVirtualAddressSpaceBase;
-constexpr std::size_t KernelVirtualAddressCodeSize = 0x62000;
+constexpr std::size_t KernelVirtualAddressCodeSize = 392_KiB;
constexpr std::size_t KernelVirtualAddressCodeEnd =
KernelVirtualAddressCodeBase + KernelVirtualAddressCodeSize;
@@ -53,14 +54,14 @@ constexpr std::size_t KernelPhysicalAddressSpaceSize =
constexpr std::size_t KernelPhysicalAddressCodeBase = DramPhysicalAddress + ReservedEarlyDramSize;
constexpr std::size_t KernelPageTableHeapSize = GetMaximumOverheadSize(MainMemorySizeMax);
-constexpr std::size_t KernelInitialPageHeapSize = Common::Size_128_KB;
+constexpr std::size_t KernelInitialPageHeapSize = 128_KiB;
-constexpr std::size_t KernelSlabHeapDataSize = Common::Size_5_MB;
-constexpr std::size_t KernelSlabHeapGapsSize = Common::Size_2_MB - Common::Size_64_KB;
+constexpr std::size_t KernelSlabHeapDataSize = 5_MiB;
+constexpr std::size_t KernelSlabHeapGapsSize = 2_MiB - 64_KiB;
constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSize;
// NOTE: This is calculated from KThread slab counts, assuming KThread size <= 0x860.
-constexpr std::size_t KernelSlabHeapAdditionalSize = 0x68000ULL;
+constexpr std::size_t KernelSlabHeapAdditionalSize = 416_KiB;
constexpr std::size_t KernelResourceSize =
KernelPageTableHeapSize + KernelInitialPageHeapSize + KernelSlabHeapSize;
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 66d260635..701268545 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -4,6 +4,7 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/literals.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/k_address_space_info.h"
@@ -23,6 +24,8 @@ namespace Kernel {
namespace {
+using namespace Common::Literals;
+
constexpr std::size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType as_type) {
switch (as_type) {
case FileSys::ProgramAddressSpaceType::Is32Bit:
@@ -89,7 +92,7 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_
}
// Set code regions and determine remaining
- constexpr std::size_t RegionAlignment{2 * 1024 * 1024};
+ constexpr std::size_t RegionAlignment{2_MiB};
VAddr process_code_start{};
VAddr process_code_end{};
std::size_t stack_region_size{};
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index d1bd98051..8ead1a769 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -10,6 +10,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/device_memory.h"
@@ -43,6 +44,8 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
KThread* thread = KThread::Create(system.Kernel());
+ SCOPE_EXIT({ thread->Close(); });
+
ASSERT(KThread::InitializeUserThread(system, thread, entry_point, 0, stack_top, priority,
owner_process.GetIdealCoreId(), &owner_process)
.IsSuccess());
@@ -162,7 +165,7 @@ void KProcess::DecrementThreadCount() {
ASSERT(num_threads > 0);
if (const auto count = --num_threads; count == 0) {
- UNIMPLEMENTED_MSG("Process termination is not implemented!");
+ LOG_WARNING(Kernel, "Process termination is not fully implemented.");
}
}
@@ -406,6 +409,9 @@ void KProcess::Finalize() {
resource_limit->Close();
}
+ // Finalize the handle table and close any open handles.
+ handle_table.Finalize();
+
// Perform inherited finalization.
KAutoObjectWithSlabHeapAndContainer<KProcess, KSynchronizationObject>::Finalize();
}
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp
index da88f35bc..0c4bba66b 100644
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -79,6 +79,7 @@ ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
R_UNLESS(current_values[index] <= value, ResultInvalidState);
limit_values[index] = value;
+ peak_values[index] = current_values[index];
return ResultSuccess;
}
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index 5c3c13ce6..b9f24475c 100644
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -28,7 +28,10 @@ namespace Kernel {
KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {}
-KServerSession::~KServerSession() {}
+KServerSession::~KServerSession() {
+ // Ensure that the global list tracking server sessions does not hold on to a reference.
+ kernel.UnregisterServerSession(this);
+}
void KServerSession::Initialize(KSession* parent_session_, std::string&& name_,
std::shared_ptr<SessionRequestManager> manager_) {
diff --git a/src/core/hle/kernel/k_trace.h b/src/core/hle/kernel/k_trace.h
index 91ebf9ab2..79391bccb 100644
--- a/src/core/hle/kernel/k_trace.h
+++ b/src/core/hle/kernel/k_trace.h
@@ -4,9 +4,13 @@
#pragma once
+#include "common/common_funcs.h"
+
namespace Kernel {
+using namespace Common::Literals;
+
constexpr bool IsKTraceEnabled = false;
-constexpr std::size_t KTraceBufferSize = IsKTraceEnabled ? 16 * 1024 * 1024 : 0;
+constexpr std::size_t KTraceBufferSize = IsKTraceEnabled ? 16_MiB : 0;
} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 2ceeaeb5f..92fbc5532 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,7 +12,6 @@
#include <utility>
#include "common/assert.h"
-#include "common/common_sizes.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/thread.h"
@@ -62,6 +61,7 @@ struct KernelCore::Impl {
void Initialize(KernelCore& kernel) {
global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
+ global_handle_table->Initialize(KHandleTable::MaxTableSize);
is_phantom_mode_for_singlecore = false;
@@ -91,9 +91,39 @@ struct KernelCore::Impl {
}
void Shutdown() {
+ // Shutdown all processes.
+ if (current_process) {
+ current_process->Finalize();
+ current_process->Close();
+ current_process = nullptr;
+ }
process_list.clear();
- // Ensures all service threads gracefully shutdown
+ // Close all open server ports.
+ std::unordered_set<KServerPort*> server_ports_;
+ {
+ std::lock_guard lk(server_ports_lock);
+ server_ports_ = server_ports;
+ server_ports.clear();
+ }
+ for (auto* server_port : server_ports_) {
+ server_port->Close();
+ }
+ // Close all open server sessions.
+ std::unordered_set<KServerSession*> server_sessions_;
+ {
+ std::lock_guard lk(server_sessions_lock);
+ server_sessions_ = server_sessions;
+ server_sessions.clear();
+ }
+ for (auto* server_session : server_sessions_) {
+ server_session->Close();
+ }
+
+ // Ensure that the object list container is finalized and properly shutdown.
+ object_list_container.Finalize();
+
+ // Ensures all service threads gracefully shutdown.
service_threads.clear();
next_object_id = 0;
@@ -112,11 +142,7 @@ struct KernelCore::Impl {
cores.clear();
- if (current_process) {
- current_process->Close();
- current_process = nullptr;
- }
-
+ global_handle_table->Finalize();
global_handle_table.reset();
preemption_event = nullptr;
@@ -143,6 +169,16 @@ struct KernelCore::Impl {
// Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
+
+ // Track kernel objects that were not freed on shutdown
+ {
+ std::lock_guard lk(registered_objects_lock);
+ if (registered_objects.size()) {
+ LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!",
+ registered_objects.size());
+ registered_objects.clear();
+ }
+ }
}
void InitializePhysicalCores() {
@@ -180,7 +216,7 @@ struct KernelCore::Impl {
system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size);
// Reserve secure applet memory, introduced in firmware 5.0.0
- constexpr u64 secure_applet_memory_size{Common::Size_4_MB};
+ constexpr u64 secure_applet_memory_size{4_MiB};
ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory,
secure_applet_memory_size));
@@ -320,8 +356,8 @@ struct KernelCore::Impl {
const VAddr code_end_virt_addr = KernelVirtualAddressCodeEnd;
// Setup the containing kernel region.
- constexpr size_t KernelRegionSize = Common::Size_1_GB;
- constexpr size_t KernelRegionAlign = Common::Size_1_GB;
+ constexpr size_t KernelRegionSize = 1_GiB;
+ constexpr size_t KernelRegionAlign = 1_GiB;
constexpr VAddr kernel_region_start =
Common::AlignDown(code_start_virt_addr, KernelRegionAlign);
size_t kernel_region_size = KernelRegionSize;
@@ -368,7 +404,7 @@ struct KernelCore::Impl {
// Decide on the actual size for the misc region.
constexpr size_t MiscRegionAlign = KernelAslrAlignment;
- constexpr size_t MiscRegionMinimumSize = Common::Size_32_MB;
+ constexpr size_t MiscRegionMinimumSize = 32_MiB;
const size_t misc_region_size = Common::AlignUp(
std::max(misc_region_needed_size, MiscRegionMinimumSize), MiscRegionAlign);
ASSERT(misc_region_size > 0);
@@ -381,7 +417,7 @@ struct KernelCore::Impl {
misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc));
// Setup the stack region.
- constexpr size_t StackRegionSize = Common::Size_14_MB;
+ constexpr size_t StackRegionSize = 14_MiB;
constexpr size_t StackRegionAlign = KernelAslrAlignment;
const VAddr stack_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
@@ -414,7 +450,7 @@ struct KernelCore::Impl {
slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab));
// Setup the temp region.
- constexpr size_t TempRegionSize = Common::Size_128_MB;
+ constexpr size_t TempRegionSize = 128_MiB;
constexpr size_t TempRegionAlign = KernelAslrAlignment;
const VAddr temp_region_start =
memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
@@ -470,7 +506,7 @@ struct KernelCore::Impl {
// Determine size available for kernel page table heaps, requiring > 8 MB.
const PAddr resource_end_phys_addr = slab_start_phys_addr + resource_region_size;
const size_t page_table_heap_size = resource_end_phys_addr - slab_end_phys_addr;
- ASSERT(page_table_heap_size / Common::Size_4_MB > 2);
+ ASSERT(page_table_heap_size / 4_MiB > 2);
// Insert a physical region for the kernel page table heap region
ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert(
@@ -495,7 +531,7 @@ struct KernelCore::Impl {
ASSERT(linear_extents.GetEndAddress() != 0);
// Setup the linear mapping region.
- constexpr size_t LinearRegionAlign = Common::Size_1_GB;
+ constexpr size_t LinearRegionAlign = 1_GiB;
const PAddr aligned_linear_phys_start =
Common::AlignDown(linear_extents.GetAddress(), LinearRegionAlign);
const size_t linear_region_size =
@@ -631,6 +667,21 @@ struct KernelCore::Impl {
user_slab_heap_size);
}
+ KClientPort* CreateNamedServicePort(std::string name) {
+ auto search = service_interface_factory.find(name);
+ if (search == service_interface_factory.end()) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ KClientPort* port = &search->second(system.ServiceManager(), system);
+ {
+ std::lock_guard lk(server_ports_lock);
+ server_ports.insert(&port->GetParent()->GetServerPort());
+ }
+ return port;
+ }
+
std::atomic<u32> next_object_id{0};
std::atomic<u64> next_kernel_process_id{KProcess::InitialKIPIDMin};
std::atomic<u64> next_user_process_id{KProcess::ProcessIDMin};
@@ -657,6 +708,12 @@ struct KernelCore::Impl {
/// the ConnectToPort SVC.
std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory;
NamedPortTable named_ports;
+ std::unordered_set<KServerPort*> server_ports;
+ std::unordered_set<KServerSession*> server_sessions;
+ std::unordered_set<KAutoObject*> registered_objects;
+ std::mutex server_ports_lock;
+ std::mutex server_sessions_lock;
+ std::mutex registered_objects_lock;
std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
std::vector<Kernel::PhysicalCore> cores;
@@ -845,12 +902,27 @@ void KernelCore::RegisterNamedService(std::string name, ServiceInterfaceFactory&
}
KClientPort* KernelCore::CreateNamedServicePort(std::string name) {
- auto search = impl->service_interface_factory.find(name);
- if (search == impl->service_interface_factory.end()) {
- UNIMPLEMENTED();
- return {};
- }
- return &search->second(impl->system.ServiceManager(), impl->system);
+ return impl->CreateNamedServicePort(std::move(name));
+}
+
+void KernelCore::RegisterServerSession(KServerSession* server_session) {
+ std::lock_guard lk(impl->server_sessions_lock);
+ impl->server_sessions.insert(server_session);
+}
+
+void KernelCore::UnregisterServerSession(KServerSession* server_session) {
+ std::lock_guard lk(impl->server_sessions_lock);
+ impl->server_sessions.erase(server_session);
+}
+
+void KernelCore::RegisterKernelObject(KAutoObject* object) {
+ std::lock_guard lk(impl->registered_objects_lock);
+ impl->registered_objects.insert(object);
+}
+
+void KernelCore::UnregisterKernelObject(KAutoObject* object) {
+ std::lock_guard lk(impl->registered_objects_lock);
+ impl->registered_objects.erase(object);
}
bool KernelCore::IsValidNamedPort(NamedPortTable::const_iterator port) const {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 2d01e1ae0..3a6db0b1c 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -45,6 +45,7 @@ class KPort;
class KProcess;
class KResourceLimit;
class KScheduler;
+class KServerSession;
class KSession;
class KSharedMemory;
class KThread;
@@ -185,6 +186,22 @@ public:
/// Opens a port to a service previously registered with RegisterNamedService.
KClientPort* CreateNamedServicePort(std::string name);
+ /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is
+ /// necessary because we do not emulate processes for HLE sessions.
+ void RegisterServerSession(KServerSession* server_session);
+
+ /// Unregisters a server session previously registered with RegisterServerSession when it was
+ /// destroyed during the current emulation session.
+ void UnregisterServerSession(KServerSession* server_session);
+
+ /// Registers all kernel objects with the global emulation state, this is purely for tracking
+ /// leaks after emulation has been shutdown.
+ void RegisterKernelObject(KAutoObject* object);
+
+ /// Unregisters a kernel object previously registered with RegisterKernelObject when it was
+ /// destroyed during the current emulation session.
+ void UnregisterKernelObject(KAutoObject* object);
+
/// Determines whether or not the given port is a valid named port.
bool IsValidNamedPort(NamedPortTable::const_iterator port) const;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 8339e11a0..2eb532472 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -298,6 +298,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out, VAddr po
// Create a session.
KClientSession* session{};
R_TRY(port->CreateSession(std::addressof(session)));
+ port->Close();
// Register the session in the table, close the extra reference.
handle_table.Register(*out, session);
@@ -1439,11 +1440,6 @@ static void ExitProcess(Core::System& system) {
LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
"Process has already exited");
-
- current_process->PrepareForTermination();
-
- // Kill the current thread
- system.Kernel().CurrentScheduler()->GetCurrentThread()->Exit();
}
static void ExitProcess32(Core::System& system) {
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index d1c1fb71d..2e969f2a8 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -26,7 +26,7 @@
#include "core/hle/service/acc/errors.h"
#include "core/hle/service/acc/profile_manager.h"
#include "core/hle/service/glue/arp.h"
-#include "core/hle/service/glue/manager.h"
+#include "core/hle/service/glue/glue_manager.h"
#include "core/hle/service/sm/sm.h"
#include "core/loader/loader.h"
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h
index 0e3ad8ec6..a83a480cd 100644
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -5,7 +5,7 @@
#pragma once
#include "common/uuid.h"
-#include "core/hle/service/glue/manager.h"
+#include "core/hle/service/glue/glue_manager.h"
#include "core/hle/service/service.h"
namespace Service::Account {
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp
index f72d5d561..24a1c9157 100644
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -48,7 +48,8 @@ ProfileManager::ProfileManager() {
CreateNewUser(UUID::Generate(), "yuzu");
}
- auto current = std::clamp<int>(Settings::values.current_user, 0, MAX_USERS - 1);
+ auto current =
+ std::clamp<int>(static_cast<s32>(Settings::values.current_user), 0, MAX_USERS - 1);
// If user index don't exist. Load the first user and change the active user
if (!UserExistsIndex(current)) {
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index b578153d3..a538f82e3 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -24,16 +24,16 @@
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applet_ae.h"
#include "core/hle/service/am/applet_oe.h"
+#include "core/hle/service/am/applets/applet_profile_select.h"
+#include "core/hle/service/am/applets/applet_software_keyboard.h"
+#include "core/hle/service/am/applets/applet_web_browser.h"
#include "core/hle/service/am/applets/applets.h"
-#include "core/hle/service/am/applets/profile_select.h"
-#include "core/hle/service/am/applets/software_keyboard.h"
-#include "core/hle/service/am/applets/web_browser.h"
#include "core/hle/service/am/idle.h"
#include "core/hle/service/am/omm.h"
#include "core/hle/service/am/spsm.h"
#include "core/hle/service/am/tcap.h"
-#include "core/hle/service/apm/controller.h"
-#include "core/hle/service/apm/interface.h"
+#include "core/hle/service/apm/apm_controller.h"
+#include "core/hle/service/apm/apm_interface.h"
#include "core/hle/service/bcat/backend/backend.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/ns/ns.h"
@@ -1443,7 +1443,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
params.is_account_selected = 1;
Account::ProfileManager profile_manager{};
- const auto uuid = profile_manager.GetUser(Settings::values.current_user);
+ const auto uuid = profile_manager.GetUser(static_cast<s32>(Settings::values.current_user));
ASSERT(uuid);
params.current_user = uuid->uuid;
diff --git a/src/core/hle/service/am/applets/applet_controller.cpp b/src/core/hle/service/am/applets/applet_controller.cpp
new file mode 100644
index 000000000..2721679c1
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_controller.cpp
@@ -0,0 +1,272 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/frontend/applets/controller.h"
+#include "core/hle/result.h"
+#include "core/hle/service/am/am.h"
+#include "core/hle/service/am/applets/applet_controller.h"
+#include "core/hle/service/hid/controllers/npad.h"
+
+namespace Service::AM::Applets {
+
+// This error code (0x183ACA) is thrown when the applet fails to initialize.
+[[maybe_unused]] constexpr ResultCode ERR_CONTROLLER_APPLET_3101{ErrorModule::HID, 3101};
+// This error code (0x183CCA) is thrown when the u32 result in ControllerSupportResultInfo is 2.
+[[maybe_unused]] constexpr ResultCode ERR_CONTROLLER_APPLET_3102{ErrorModule::HID, 3102};
+
+static Core::Frontend::ControllerParameters ConvertToFrontendParameters(
+ ControllerSupportArgPrivate private_arg, ControllerSupportArgHeader header, bool enable_text,
+ std::vector<IdentificationColor> identification_colors, std::vector<ExplainText> text) {
+ HID::Controller_NPad::NpadStyleSet npad_style_set;
+ npad_style_set.raw = private_arg.style_set;
+
+ return {
+ .min_players = std::max(s8{1}, header.player_count_min),
+ .max_players = header.player_count_max,
+ .keep_controllers_connected = header.enable_take_over_connection,
+ .enable_single_mode = header.enable_single_mode,
+ .enable_border_color = header.enable_identification_color,
+ .border_colors = std::move(identification_colors),
+ .enable_explain_text = enable_text,
+ .explain_text = std::move(text),
+ .allow_pro_controller = npad_style_set.fullkey == 1,
+ .allow_handheld = npad_style_set.handheld == 1,
+ .allow_dual_joycons = npad_style_set.joycon_dual == 1,
+ .allow_left_joycon = npad_style_set.joycon_left == 1,
+ .allow_right_joycon = npad_style_set.joycon_right == 1,
+ };
+}
+
+Controller::Controller(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::ControllerApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
+
+Controller::~Controller() = default;
+
+void Controller::Initialize() {
+ Applet::Initialize();
+
+ LOG_INFO(Service_HID, "Initializing Controller Applet.");
+
+ LOG_DEBUG(Service_HID,
+ "Initializing Applet with common_args: arg_version={}, lib_version={}, "
+ "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
+ common_args.arguments_version, common_args.library_version,
+ common_args.play_startup_sound, common_args.size, common_args.system_tick,
+ common_args.theme_color);
+
+ controller_applet_version = ControllerAppletVersion{common_args.library_version};
+
+ const auto private_arg_storage = broker.PopNormalDataToApplet();
+ ASSERT(private_arg_storage != nullptr);
+
+ const auto& private_arg = private_arg_storage->GetData();
+ ASSERT(private_arg.size() == sizeof(ControllerSupportArgPrivate));
+
+ std::memcpy(&controller_private_arg, private_arg.data(), private_arg.size());
+ ASSERT_MSG(controller_private_arg.arg_private_size == sizeof(ControllerSupportArgPrivate),
+ "Unknown ControllerSupportArgPrivate revision={} with size={}",
+ controller_applet_version, controller_private_arg.arg_private_size);
+
+ // Some games such as Cave Story+ set invalid values for the ControllerSupportMode.
+ // Defer to arg_size to set the ControllerSupportMode.
+ if (controller_private_arg.mode >= ControllerSupportMode::MaxControllerSupportMode) {
+ switch (controller_private_arg.arg_size) {
+ case sizeof(ControllerSupportArgOld):
+ case sizeof(ControllerSupportArgNew):
+ controller_private_arg.mode = ControllerSupportMode::ShowControllerSupport;
+ break;
+ case sizeof(ControllerUpdateFirmwareArg):
+ controller_private_arg.mode = ControllerSupportMode::ShowControllerFirmwareUpdate;
+ break;
+ case sizeof(ControllerKeyRemappingArg):
+ controller_private_arg.mode =
+ ControllerSupportMode::ShowControllerKeyRemappingForSystem;
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unknown ControllerPrivateArg mode={} with arg_size={}",
+ controller_private_arg.mode, controller_private_arg.arg_size);
+ controller_private_arg.mode = ControllerSupportMode::ShowControllerSupport;
+ break;
+ }
+ }
+
+ // Some games such as Cave Story+ set invalid values for the ControllerSupportCaller.
+ // This is always 0 (Application) except with ShowControllerFirmwareUpdateForSystem.
+ if (controller_private_arg.caller >= ControllerSupportCaller::MaxControllerSupportCaller) {
+ if (controller_private_arg.flag_1 &&
+ (controller_private_arg.mode == ControllerSupportMode::ShowControllerFirmwareUpdate ||
+ controller_private_arg.mode ==
+ ControllerSupportMode::ShowControllerKeyRemappingForSystem)) {
+ controller_private_arg.caller = ControllerSupportCaller::System;
+ } else {
+ controller_private_arg.caller = ControllerSupportCaller::Application;
+ }
+ }
+
+ switch (controller_private_arg.mode) {
+ case ControllerSupportMode::ShowControllerSupport:
+ case ControllerSupportMode::ShowControllerStrapGuide: {
+ const auto user_arg_storage = broker.PopNormalDataToApplet();
+ ASSERT(user_arg_storage != nullptr);
+
+ const auto& user_arg = user_arg_storage->GetData();
+ switch (controller_applet_version) {
+ case ControllerAppletVersion::Version3:
+ case ControllerAppletVersion::Version4:
+ case ControllerAppletVersion::Version5:
+ ASSERT(user_arg.size() == sizeof(ControllerSupportArgOld));
+ std::memcpy(&controller_user_arg_old, user_arg.data(), user_arg.size());
+ break;
+ case ControllerAppletVersion::Version7:
+ case ControllerAppletVersion::Version8:
+ ASSERT(user_arg.size() == sizeof(ControllerSupportArgNew));
+ std::memcpy(&controller_user_arg_new, user_arg.data(), user_arg.size());
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unknown ControllerSupportArg revision={} with size={}",
+ controller_applet_version, controller_private_arg.arg_size);
+ ASSERT(user_arg.size() >= sizeof(ControllerSupportArgNew));
+ std::memcpy(&controller_user_arg_new, user_arg.data(), sizeof(ControllerSupportArgNew));
+ break;
+ }
+ break;
+ }
+ case ControllerSupportMode::ShowControllerFirmwareUpdate: {
+ const auto update_arg_storage = broker.PopNormalDataToApplet();
+ ASSERT(update_arg_storage != nullptr);
+
+ const auto& update_arg = update_arg_storage->GetData();
+ ASSERT(update_arg.size() == sizeof(ControllerUpdateFirmwareArg));
+
+ std::memcpy(&controller_update_arg, update_arg.data(), update_arg.size());
+ break;
+ }
+ case ControllerSupportMode::ShowControllerKeyRemappingForSystem: {
+ const auto remapping_arg_storage = broker.PopNormalDataToApplet();
+ ASSERT(remapping_arg_storage != nullptr);
+
+ const auto& remapping_arg = remapping_arg_storage->GetData();
+ ASSERT(remapping_arg.size() == sizeof(ControllerKeyRemappingArg));
+
+ std::memcpy(&controller_key_remapping_arg, remapping_arg.data(), remapping_arg.size());
+ break;
+ }
+ default: {
+ UNIMPLEMENTED_MSG("Unimplemented ControllerSupportMode={}", controller_private_arg.mode);
+ break;
+ }
+ }
+}
+
+bool Controller::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode Controller::GetStatus() const {
+ return status;
+}
+
+void Controller::ExecuteInteractive() {
+ UNREACHABLE_MSG("Attempted to call interactive execution on non-interactive applet.");
+}
+
+void Controller::Execute() {
+ switch (controller_private_arg.mode) {
+ case ControllerSupportMode::ShowControllerSupport: {
+ const auto parameters = [this] {
+ switch (controller_applet_version) {
+ case ControllerAppletVersion::Version3:
+ case ControllerAppletVersion::Version4:
+ case ControllerAppletVersion::Version5:
+ return ConvertToFrontendParameters(
+ controller_private_arg, controller_user_arg_old.header,
+ controller_user_arg_old.enable_explain_text,
+ std::vector<IdentificationColor>(
+ controller_user_arg_old.identification_colors.begin(),
+ controller_user_arg_old.identification_colors.end()),
+ std::vector<ExplainText>(controller_user_arg_old.explain_text.begin(),
+ controller_user_arg_old.explain_text.end()));
+ case ControllerAppletVersion::Version7:
+ case ControllerAppletVersion::Version8:
+ default:
+ return ConvertToFrontendParameters(
+ controller_private_arg, controller_user_arg_new.header,
+ controller_user_arg_new.enable_explain_text,
+ std::vector<IdentificationColor>(
+ controller_user_arg_new.identification_colors.begin(),
+ controller_user_arg_new.identification_colors.end()),
+ std::vector<ExplainText>(controller_user_arg_new.explain_text.begin(),
+ controller_user_arg_new.explain_text.end()));
+ }
+ }();
+
+ is_single_mode = parameters.enable_single_mode;
+
+ LOG_DEBUG(Service_HID,
+ "Controller Parameters: min_players={}, max_players={}, "
+ "keep_controllers_connected={}, enable_single_mode={}, enable_border_color={}, "
+ "enable_explain_text={}, allow_pro_controller={}, allow_handheld={}, "
+ "allow_dual_joycons={}, allow_left_joycon={}, allow_right_joycon={}",
+ parameters.min_players, parameters.max_players,
+ parameters.keep_controllers_connected, parameters.enable_single_mode,
+ parameters.enable_border_color, parameters.enable_explain_text,
+ parameters.allow_pro_controller, parameters.allow_handheld,
+ parameters.allow_dual_joycons, parameters.allow_left_joycon,
+ parameters.allow_right_joycon);
+
+ frontend.ReconfigureControllers([this] { ConfigurationComplete(); }, parameters);
+ break;
+ }
+ case ControllerSupportMode::ShowControllerStrapGuide:
+ case ControllerSupportMode::ShowControllerFirmwareUpdate:
+ case ControllerSupportMode::ShowControllerKeyRemappingForSystem:
+ UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
+ controller_private_arg.mode);
+ ConfigurationComplete();
+ break;
+ default: {
+ ConfigurationComplete();
+ break;
+ }
+ }
+}
+
+void Controller::ConfigurationComplete() {
+ ControllerSupportResultInfo result_info{};
+
+ const auto& players = Settings::values.players.GetValue();
+
+ // If enable_single_mode is enabled, player_count is 1 regardless of any other parameters.
+ // Otherwise, only count connected players from P1-P8.
+ result_info.player_count =
+ is_single_mode
+ ? 1
+ : static_cast<s8>(std::count_if(players.begin(), players.end() - 2,
+ [](const auto& player) { return player.connected; }));
+
+ result_info.selected_id = HID::Controller_NPad::IndexToNPad(std::distance(
+ players.begin(), std::find_if(players.begin(), players.end(),
+ [](const auto& player) { return player.connected; })));
+
+ result_info.result = 0;
+
+ LOG_DEBUG(Service_HID, "Result Info: player_count={}, selected_id={}, result={}",
+ result_info.player_count, result_info.selected_id, result_info.result);
+
+ complete = true;
+ out_data = std::vector<u8>(sizeof(ControllerSupportResultInfo));
+ std::memcpy(out_data.data(), &result_info, out_data.size());
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
+ broker.SignalStateChanged();
+}
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/applet_controller.h b/src/core/hle/service/am/applets/applet_controller.h
new file mode 100644
index 000000000..0a34c4fc0
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_controller.h
@@ -0,0 +1,148 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "core/hle/result.h"
+#include "core/hle/service/am/applets/applets.h"
+
+namespace Core {
+class System;
+}
+
+namespace Service::AM::Applets {
+
+using IdentificationColor = std::array<u8, 4>;
+using ExplainText = std::array<char, 0x81>;
+
+enum class ControllerAppletVersion : u32_le {
+ Version3 = 0x3, // 1.0.0 - 2.3.0
+ Version4 = 0x4, // 3.0.0 - 5.1.0
+ Version5 = 0x5, // 6.0.0 - 7.0.1
+ Version7 = 0x7, // 8.0.0 - 10.2.0
+ Version8 = 0x8, // 11.0.0+
+};
+
+enum class ControllerSupportMode : u8 {
+ ShowControllerSupport,
+ ShowControllerStrapGuide,
+ ShowControllerFirmwareUpdate,
+ ShowControllerKeyRemappingForSystem,
+
+ MaxControllerSupportMode,
+};
+
+enum class ControllerSupportCaller : u8 {
+ Application,
+ System,
+
+ MaxControllerSupportCaller,
+};
+
+struct ControllerSupportArgPrivate {
+ u32 arg_private_size{};
+ u32 arg_size{};
+ bool flag_0{};
+ bool flag_1{};
+ ControllerSupportMode mode{};
+ ControllerSupportCaller caller{};
+ u32 style_set{};
+ u32 joy_hold_type{};
+};
+static_assert(sizeof(ControllerSupportArgPrivate) == 0x14,
+ "ControllerSupportArgPrivate has incorrect size.");
+
+struct ControllerSupportArgHeader {
+ s8 player_count_min{};
+ s8 player_count_max{};
+ bool enable_take_over_connection{};
+ bool enable_left_justify{};
+ bool enable_permit_joy_dual{};
+ bool enable_single_mode{};
+ bool enable_identification_color{};
+};
+static_assert(sizeof(ControllerSupportArgHeader) == 0x7,
+ "ControllerSupportArgHeader has incorrect size.");
+
+// LibraryAppletVersion 0x3, 0x4, 0x5
+struct ControllerSupportArgOld {
+ ControllerSupportArgHeader header{};
+ std::array<IdentificationColor, 4> identification_colors{};
+ bool enable_explain_text{};
+ std::array<ExplainText, 4> explain_text{};
+};
+static_assert(sizeof(ControllerSupportArgOld) == 0x21C,
+ "ControllerSupportArgOld has incorrect size.");
+
+// LibraryAppletVersion 0x7, 0x8
+struct ControllerSupportArgNew {
+ ControllerSupportArgHeader header{};
+ std::array<IdentificationColor, 8> identification_colors{};
+ bool enable_explain_text{};
+ std::array<ExplainText, 8> explain_text{};
+};
+static_assert(sizeof(ControllerSupportArgNew) == 0x430,
+ "ControllerSupportArgNew has incorrect size.");
+
+struct ControllerUpdateFirmwareArg {
+ bool enable_force_update{};
+ INSERT_PADDING_BYTES(3);
+};
+static_assert(sizeof(ControllerUpdateFirmwareArg) == 0x4,
+ "ControllerUpdateFirmwareArg has incorrect size.");
+
+struct ControllerKeyRemappingArg {
+ u64 unknown{};
+ u32 unknown_2{};
+ INSERT_PADDING_WORDS(1);
+};
+static_assert(sizeof(ControllerKeyRemappingArg) == 0x10,
+ "ControllerKeyRemappingArg has incorrect size.");
+
+struct ControllerSupportResultInfo {
+ s8 player_count{};
+ INSERT_PADDING_BYTES(3);
+ u32 selected_id{};
+ u32 result{};
+};
+static_assert(sizeof(ControllerSupportResultInfo) == 0xC,
+ "ControllerSupportResultInfo has incorrect size.");
+
+class Controller final : public Applet {
+public:
+ explicit Controller(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::ControllerApplet& frontend_);
+ ~Controller() override;
+
+ void Initialize() override;
+
+ bool TransactionComplete() const override;
+ ResultCode GetStatus() const override;
+ void ExecuteInteractive() override;
+ void Execute() override;
+
+ void ConfigurationComplete();
+
+private:
+ const Core::Frontend::ControllerApplet& frontend;
+ Core::System& system;
+
+ ControllerAppletVersion controller_applet_version;
+ ControllerSupportArgPrivate controller_private_arg;
+ ControllerSupportArgOld controller_user_arg_old;
+ ControllerSupportArgNew controller_user_arg_new;
+ ControllerUpdateFirmwareArg controller_update_arg;
+ ControllerKeyRemappingArg controller_key_remapping_arg;
+ bool complete{false};
+ ResultCode status{ResultSuccess};
+ bool is_single_mode{false};
+ std::vector<u8> out_data;
+};
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/applet_error.cpp b/src/core/hle/service/am/applets/applet_error.cpp
new file mode 100644
index 000000000..ef6854d62
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_error.cpp
@@ -0,0 +1,194 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <cstring>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/frontend/applets/error.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/service/am/am.h"
+#include "core/hle/service/am/applets/applet_error.h"
+#include "core/reporter.h"
+
+namespace Service::AM::Applets {
+
+#pragma pack(push, 4)
+struct ShowError {
+ u8 mode;
+ bool jump;
+ INSERT_PADDING_BYTES_NOINIT(4);
+ bool use_64bit_error_code;
+ INSERT_PADDING_BYTES_NOINIT(1);
+ u64 error_code_64;
+ u32 error_code_32;
+};
+static_assert(sizeof(ShowError) == 0x14, "ShowError has incorrect size.");
+#pragma pack(pop)
+
+struct ShowErrorRecord {
+ u8 mode;
+ bool jump;
+ INSERT_PADDING_BYTES_NOINIT(6);
+ u64 error_code_64;
+ u64 posix_time;
+};
+static_assert(sizeof(ShowErrorRecord) == 0x18, "ShowErrorRecord has incorrect size.");
+
+struct SystemErrorArg {
+ u8 mode;
+ bool jump;
+ INSERT_PADDING_BYTES_NOINIT(6);
+ u64 error_code_64;
+ std::array<char, 8> language_code;
+ std::array<char, 0x800> main_text;
+ std::array<char, 0x800> detail_text;
+};
+static_assert(sizeof(SystemErrorArg) == 0x1018, "SystemErrorArg has incorrect size.");
+
+struct ApplicationErrorArg {
+ u8 mode;
+ bool jump;
+ INSERT_PADDING_BYTES_NOINIT(6);
+ u32 error_code;
+ std::array<char, 8> language_code;
+ std::array<char, 0x800> main_text;
+ std::array<char, 0x800> detail_text;
+};
+static_assert(sizeof(ApplicationErrorArg) == 0x1014, "ApplicationErrorArg has incorrect size.");
+
+union Error::ErrorArguments {
+ ShowError error;
+ ShowErrorRecord error_record;
+ SystemErrorArg system_error;
+ ApplicationErrorArg application_error;
+ std::array<u8, 0x1018> raw{};
+};
+
+namespace {
+template <typename T>
+void CopyArgumentData(const std::vector<u8>& data, T& variable) {
+ ASSERT(data.size() >= sizeof(T));
+ std::memcpy(&variable, data.data(), sizeof(T));
+}
+
+ResultCode Decode64BitError(u64 error) {
+ const auto description = (error >> 32) & 0x1FFF;
+ auto module = error & 0x3FF;
+ if (module >= 2000)
+ module -= 2000;
+ module &= 0x1FF;
+ return {static_cast<ErrorModule>(module), static_cast<u32>(description)};
+}
+
+} // Anonymous namespace
+
+Error::Error(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::ErrorApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
+
+Error::~Error() = default;
+
+void Error::Initialize() {
+ Applet::Initialize();
+ args = std::make_unique<ErrorArguments>();
+ complete = false;
+
+ const auto storage = broker.PopNormalDataToApplet();
+ ASSERT(storage != nullptr);
+ const auto data = storage->GetData();
+
+ ASSERT(!data.empty());
+ std::memcpy(&mode, data.data(), sizeof(ErrorAppletMode));
+
+ switch (mode) {
+ case ErrorAppletMode::ShowError:
+ CopyArgumentData(data, args->error);
+ if (args->error.use_64bit_error_code) {
+ error_code = Decode64BitError(args->error.error_code_64);
+ } else {
+ error_code = ResultCode(args->error.error_code_32);
+ }
+ break;
+ case ErrorAppletMode::ShowSystemError:
+ CopyArgumentData(data, args->system_error);
+ error_code = ResultCode(Decode64BitError(args->system_error.error_code_64));
+ break;
+ case ErrorAppletMode::ShowApplicationError:
+ CopyArgumentData(data, args->application_error);
+ error_code = ResultCode(args->application_error.error_code);
+ break;
+ case ErrorAppletMode::ShowErrorRecord:
+ CopyArgumentData(data, args->error_record);
+ error_code = Decode64BitError(args->error_record.error_code_64);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode);
+ }
+}
+
+bool Error::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode Error::GetStatus() const {
+ return ResultSuccess;
+}
+
+void Error::ExecuteInteractive() {
+ UNREACHABLE_MSG("Unexpected interactive applet data!");
+}
+
+void Error::Execute() {
+ if (complete) {
+ return;
+ }
+
+ const auto callback = [this] { DisplayCompleted(); };
+ const auto title_id = system.CurrentProcess()->GetTitleID();
+ const auto& reporter{system.GetReporter()};
+
+ switch (mode) {
+ case ErrorAppletMode::ShowError:
+ reporter.SaveErrorReport(title_id, error_code);
+ frontend.ShowError(error_code, callback);
+ break;
+ case ErrorAppletMode::ShowSystemError:
+ case ErrorAppletMode::ShowApplicationError: {
+ const auto is_system = mode == ErrorAppletMode::ShowSystemError;
+ const auto& main_text =
+ is_system ? args->system_error.main_text : args->application_error.main_text;
+ const auto& detail_text =
+ is_system ? args->system_error.detail_text : args->application_error.detail_text;
+
+ const auto main_text_string =
+ Common::StringFromFixedZeroTerminatedBuffer(main_text.data(), main_text.size());
+ const auto detail_text_string =
+ Common::StringFromFixedZeroTerminatedBuffer(detail_text.data(), detail_text.size());
+
+ reporter.SaveErrorReport(title_id, error_code, main_text_string, detail_text_string);
+ frontend.ShowCustomErrorText(error_code, main_text_string, detail_text_string, callback);
+ break;
+ }
+ case ErrorAppletMode::ShowErrorRecord:
+ reporter.SaveErrorReport(title_id, error_code,
+ fmt::format("{:016X}", args->error_record.posix_time));
+ frontend.ShowErrorWithTimestamp(
+ error_code, std::chrono::seconds{args->error_record.posix_time}, callback);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode);
+ DisplayCompleted();
+ }
+}
+
+void Error::DisplayCompleted() {
+ complete = true;
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{}));
+ broker.SignalStateChanged();
+}
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/error.h b/src/core/hle/service/am/applets/applet_error.h
index 8aa9046a5..8aa9046a5 100644
--- a/src/core/hle/service/am/applets/error.h
+++ b/src/core/hle/service/am/applets/applet_error.h
diff --git a/src/core/hle/service/am/applets/applet_general_backend.cpp b/src/core/hle/service/am/applets/applet_general_backend.cpp
new file mode 100644
index 000000000..0f413f9a0
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_general_backend.cpp
@@ -0,0 +1,255 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "common/assert.h"
+#include "common/hex_util.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/frontend/applets/general_frontend.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/result.h"
+#include "core/hle/service/am/am.h"
+#include "core/hle/service/am/applets/applet_general_backend.h"
+#include "core/reporter.h"
+
+namespace Service::AM::Applets {
+
+constexpr ResultCode ERROR_INVALID_PIN{ErrorModule::PCTL, 221};
+
+static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) {
+ std::shared_ptr<IStorage> storage = broker.PopNormalDataToApplet();
+ for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) {
+ const auto data = storage->GetData();
+ LOG_INFO(Service_AM,
+ "called (STUBBED), during {} received normal data with size={:08X}, data={}",
+ prefix, data.size(), Common::HexToString(data));
+ }
+
+ storage = broker.PopInteractiveDataToApplet();
+ for (; storage != nullptr; storage = broker.PopInteractiveDataToApplet()) {
+ const auto data = storage->GetData();
+ LOG_INFO(Service_AM,
+ "called (STUBBED), during {} received interactive data with size={:08X}, data={}",
+ prefix, data.size(), Common::HexToString(data));
+ }
+}
+
+Auth::Auth(Core::System& system_, LibraryAppletMode applet_mode_,
+ Core::Frontend::ParentalControlsApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
+
+Auth::~Auth() = default;
+
+void Auth::Initialize() {
+ Applet::Initialize();
+ complete = false;
+
+ const auto storage = broker.PopNormalDataToApplet();
+ ASSERT(storage != nullptr);
+ const auto data = storage->GetData();
+ ASSERT(data.size() >= 0xC);
+
+ struct Arg {
+ INSERT_PADDING_BYTES(4);
+ AuthAppletType type;
+ u8 arg0;
+ u8 arg1;
+ u8 arg2;
+ INSERT_PADDING_BYTES(1);
+ };
+ static_assert(sizeof(Arg) == 0xC, "Arg (AuthApplet) has incorrect size.");
+
+ Arg arg{};
+ std::memcpy(&arg, data.data(), sizeof(Arg));
+
+ type = arg.type;
+ arg0 = arg.arg0;
+ arg1 = arg.arg1;
+ arg2 = arg.arg2;
+}
+
+bool Auth::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode Auth::GetStatus() const {
+ return successful ? ResultSuccess : ERROR_INVALID_PIN;
+}
+
+void Auth::ExecuteInteractive() {
+ UNREACHABLE_MSG("Unexpected interactive applet data.");
+}
+
+void Auth::Execute() {
+ if (complete) {
+ return;
+ }
+
+ const auto unimplemented_log = [this] {
+ UNIMPLEMENTED_MSG("Unimplemented Auth applet type for type={:08X}, arg0={:02X}, "
+ "arg1={:02X}, arg2={:02X}",
+ type, arg0, arg1, arg2);
+ };
+
+ switch (type) {
+ case AuthAppletType::ShowParentalAuthentication: {
+ const auto callback = [this](bool is_successful) { AuthFinished(is_successful); };
+
+ if (arg0 == 1 && arg1 == 0 && arg2 == 1) {
+ // ShowAuthenticatorForConfiguration
+ frontend.VerifyPINForSettings(callback);
+ } else if (arg1 == 0 && arg2 == 0) {
+ // ShowParentalAuthentication(bool)
+ frontend.VerifyPIN(callback, static_cast<bool>(arg0));
+ } else {
+ unimplemented_log();
+ }
+ break;
+ }
+ case AuthAppletType::RegisterParentalPasscode: {
+ const auto callback = [this] { AuthFinished(true); };
+
+ if (arg0 == 0 && arg1 == 0 && arg2 == 0) {
+ // RegisterParentalPasscode
+ frontend.RegisterPIN(callback);
+ } else {
+ unimplemented_log();
+ }
+ break;
+ }
+ case AuthAppletType::ChangeParentalPasscode: {
+ const auto callback = [this] { AuthFinished(true); };
+
+ if (arg0 == 0 && arg1 == 0 && arg2 == 0) {
+ // ChangeParentalPasscode
+ frontend.ChangePIN(callback);
+ } else {
+ unimplemented_log();
+ }
+ break;
+ }
+ default:
+ unimplemented_log();
+ }
+}
+
+void Auth::AuthFinished(bool is_successful) {
+ successful = is_successful;
+
+ struct Return {
+ ResultCode result_code;
+ };
+ static_assert(sizeof(Return) == 0x4, "Return (AuthApplet) has incorrect size.");
+
+ Return return_{GetStatus()};
+
+ std::vector<u8> out(sizeof(Return));
+ std::memcpy(out.data(), &return_, sizeof(Return));
+
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out)));
+ broker.SignalStateChanged();
+}
+
+PhotoViewer::PhotoViewer(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::PhotoViewerApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
+
+PhotoViewer::~PhotoViewer() = default;
+
+void PhotoViewer::Initialize() {
+ Applet::Initialize();
+ complete = false;
+
+ const auto storage = broker.PopNormalDataToApplet();
+ ASSERT(storage != nullptr);
+ const auto data = storage->GetData();
+ ASSERT(!data.empty());
+ mode = static_cast<PhotoViewerAppletMode>(data[0]);
+}
+
+bool PhotoViewer::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode PhotoViewer::GetStatus() const {
+ return ResultSuccess;
+}
+
+void PhotoViewer::ExecuteInteractive() {
+ UNREACHABLE_MSG("Unexpected interactive applet data.");
+}
+
+void PhotoViewer::Execute() {
+ if (complete)
+ return;
+
+ const auto callback = [this] { ViewFinished(); };
+ switch (mode) {
+ case PhotoViewerAppletMode::CurrentApp:
+ frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback);
+ break;
+ case PhotoViewerAppletMode::AllApps:
+ frontend.ShowAllPhotos(callback);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", mode);
+ }
+}
+
+void PhotoViewer::ViewFinished() {
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{}));
+ broker.SignalStateChanged();
+}
+
+StubApplet::StubApplet(Core::System& system_, AppletId id_, LibraryAppletMode applet_mode_)
+ : Applet{system_, applet_mode_}, id{id_}, system{system_} {}
+
+StubApplet::~StubApplet() = default;
+
+void StubApplet::Initialize() {
+ LOG_WARNING(Service_AM, "called (STUBBED)");
+ Applet::Initialize();
+
+ const auto data = broker.PeekDataToAppletForDebug();
+ system.GetReporter().SaveUnimplementedAppletReport(
+ static_cast<u32>(id), common_args.arguments_version, common_args.library_version,
+ common_args.theme_color, common_args.play_startup_sound, common_args.system_tick,
+ data.normal, data.interactive);
+
+ LogCurrentStorage(broker, "Initialize");
+}
+
+bool StubApplet::TransactionComplete() const {
+ LOG_WARNING(Service_AM, "called (STUBBED)");
+ return true;
+}
+
+ResultCode StubApplet::GetStatus() const {
+ LOG_WARNING(Service_AM, "called (STUBBED)");
+ return ResultSuccess;
+}
+
+void StubApplet::ExecuteInteractive() {
+ LOG_WARNING(Service_AM, "called (STUBBED)");
+ LogCurrentStorage(broker, "ExecuteInteractive");
+
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
+ broker.PushInteractiveDataFromApplet(
+ std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
+ broker.SignalStateChanged();
+}
+
+void StubApplet::Execute() {
+ LOG_WARNING(Service_AM, "called (STUBBED)");
+ LogCurrentStorage(broker, "Execute");
+
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
+ broker.PushInteractiveDataFromApplet(
+ std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
+ broker.SignalStateChanged();
+}
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/general_backend.h b/src/core/hle/service/am/applets/applet_general_backend.h
index 7496ded88..7496ded88 100644
--- a/src/core/hle/service/am/applets/general_backend.h
+++ b/src/core/hle/service/am/applets/applet_general_backend.h
diff --git a/src/core/hle/service/am/applets/applet_profile_select.cpp b/src/core/hle/service/am/applets/applet_profile_select.cpp
new file mode 100644
index 000000000..bdc21778e
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_profile_select.cpp
@@ -0,0 +1,78 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/assert.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/frontend/applets/profile_select.h"
+#include "core/hle/service/am/am.h"
+#include "core/hle/service/am/applets/applet_profile_select.h"
+
+namespace Service::AM::Applets {
+
+constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1};
+
+ProfileSelect::ProfileSelect(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::ProfileSelectApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
+
+ProfileSelect::~ProfileSelect() = default;
+
+void ProfileSelect::Initialize() {
+ complete = false;
+ status = ResultSuccess;
+ final_data.clear();
+
+ Applet::Initialize();
+
+ const auto user_config_storage = broker.PopNormalDataToApplet();
+ ASSERT(user_config_storage != nullptr);
+ const auto& user_config = user_config_storage->GetData();
+
+ ASSERT(user_config.size() >= sizeof(UserSelectionConfig));
+ std::memcpy(&config, user_config.data(), sizeof(UserSelectionConfig));
+}
+
+bool ProfileSelect::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode ProfileSelect::GetStatus() const {
+ return status;
+}
+
+void ProfileSelect::ExecuteInteractive() {
+ UNREACHABLE_MSG("Attempted to call interactive execution on non-interactive applet.");
+}
+
+void ProfileSelect::Execute() {
+ if (complete) {
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(final_data)));
+ return;
+ }
+
+ frontend.SelectProfile([this](std::optional<Common::UUID> uuid) { SelectionComplete(uuid); });
+}
+
+void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) {
+ UserSelectionOutput output{};
+
+ if (uuid.has_value() && uuid->uuid != Common::INVALID_UUID) {
+ output.result = 0;
+ output.uuid_selected = uuid->uuid;
+ } else {
+ status = ERR_USER_CANCELLED_SELECTION;
+ output.result = ERR_USER_CANCELLED_SELECTION.raw;
+ output.uuid_selected = Common::INVALID_UUID;
+ }
+
+ final_data = std::vector<u8>(sizeof(UserSelectionOutput));
+ std::memcpy(final_data.data(), &output, final_data.size());
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(final_data)));
+ broker.SignalStateChanged();
+}
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/profile_select.h b/src/core/hle/service/am/applets/applet_profile_select.h
index 8fb76e6c4..8fb76e6c4 100644
--- a/src/core/hle/service/am/applets/profile_select.h
+++ b/src/core/hle/service/am/applets/applet_profile_select.h
diff --git a/src/core/hle/service/am/applets/applet_software_keyboard.cpp b/src/core/hle/service/am/applets/applet_software_keyboard.cpp
new file mode 100644
index 000000000..7cae90609
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_software_keyboard.cpp
@@ -0,0 +1,1082 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/frontend/applets/software_keyboard.h"
+#include "core/hle/service/am/am.h"
+#include "core/hle/service/am/applets/applet_software_keyboard.h"
+
+namespace Service::AM::Applets {
+
+namespace {
+
+// The maximum number of UTF-16 characters that can be input into the swkbd text field.
+constexpr u32 DEFAULT_MAX_TEXT_LENGTH = 500;
+
+constexpr std::size_t REPLY_BASE_SIZE = sizeof(SwkbdState) + sizeof(SwkbdReplyType);
+constexpr std::size_t REPLY_UTF8_SIZE = 0x7D4;
+constexpr std::size_t REPLY_UTF16_SIZE = 0x3EC;
+
+constexpr const char* GetTextCheckResultName(SwkbdTextCheckResult text_check_result) {
+ switch (text_check_result) {
+ case SwkbdTextCheckResult::Success:
+ return "Success";
+ case SwkbdTextCheckResult::Failure:
+ return "Failure";
+ case SwkbdTextCheckResult::Confirm:
+ return "Confirm";
+ case SwkbdTextCheckResult::Silent:
+ return "Silent";
+ default:
+ UNIMPLEMENTED_MSG("Unknown TextCheckResult={}", text_check_result);
+ return "Unknown";
+ }
+}
+
+void SetReplyBase(std::vector<u8>& reply, SwkbdState state, SwkbdReplyType reply_type) {
+ std::memcpy(reply.data(), &state, sizeof(SwkbdState));
+ std::memcpy(reply.data() + sizeof(SwkbdState), &reply_type, sizeof(SwkbdReplyType));
+}
+
+} // Anonymous namespace
+
+SoftwareKeyboard::SoftwareKeyboard(Core::System& system_, LibraryAppletMode applet_mode_,
+ Core::Frontend::SoftwareKeyboardApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
+
+SoftwareKeyboard::~SoftwareKeyboard() = default;
+
+void SoftwareKeyboard::Initialize() {
+ Applet::Initialize();
+
+ LOG_INFO(Service_AM, "Initializing Software Keyboard Applet with LibraryAppletMode={}",
+ applet_mode);
+
+ LOG_DEBUG(Service_AM,
+ "Initializing Applet with common_args: arg_version={}, lib_version={}, "
+ "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
+ common_args.arguments_version, common_args.library_version,
+ common_args.play_startup_sound, common_args.size, common_args.system_tick,
+ common_args.theme_color);
+
+ swkbd_applet_version = SwkbdAppletVersion{common_args.library_version};
+
+ switch (applet_mode) {
+ case LibraryAppletMode::AllForeground:
+ InitializeForeground();
+ break;
+ case LibraryAppletMode::Background:
+ case LibraryAppletMode::BackgroundIndirectDisplay:
+ InitializeBackground(applet_mode);
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid LibraryAppletMode={}", applet_mode);
+ break;
+ }
+}
+
+bool SoftwareKeyboard::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode SoftwareKeyboard::GetStatus() const {
+ return status;
+}
+
+void SoftwareKeyboard::ExecuteInteractive() {
+ if (complete) {
+ return;
+ }
+
+ if (is_background) {
+ ProcessInlineKeyboardRequest();
+ } else {
+ ProcessTextCheck();
+ }
+}
+
+void SoftwareKeyboard::Execute() {
+ if (complete) {
+ return;
+ }
+
+ if (is_background) {
+ return;
+ }
+
+ ShowNormalKeyboard();
+}
+
+void SoftwareKeyboard::SubmitTextNormal(SwkbdResult result, std::u16string submitted_text) {
+ if (complete) {
+ return;
+ }
+
+ if (swkbd_config_common.use_text_check && result == SwkbdResult::Ok) {
+ SubmitForTextCheck(submitted_text);
+ } else {
+ SubmitNormalOutputAndExit(result, submitted_text);
+ }
+}
+
+void SoftwareKeyboard::SubmitTextInline(SwkbdReplyType reply_type, std::u16string submitted_text,
+ s32 cursor_position) {
+ if (complete) {
+ return;
+ }
+
+ current_text = std::move(submitted_text);
+ current_cursor_position = cursor_position;
+
+ if (inline_use_utf8) {
+ switch (reply_type) {
+ case SwkbdReplyType::ChangedString:
+ reply_type = SwkbdReplyType::ChangedStringUtf8;
+ break;
+ case SwkbdReplyType::MovedCursor:
+ reply_type = SwkbdReplyType::MovedCursorUtf8;
+ break;
+ case SwkbdReplyType::DecidedEnter:
+ reply_type = SwkbdReplyType::DecidedEnterUtf8;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (use_changed_string_v2) {
+ switch (reply_type) {
+ case SwkbdReplyType::ChangedString:
+ reply_type = SwkbdReplyType::ChangedStringV2;
+ break;
+ case SwkbdReplyType::ChangedStringUtf8:
+ reply_type = SwkbdReplyType::ChangedStringUtf8V2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (use_moved_cursor_v2) {
+ switch (reply_type) {
+ case SwkbdReplyType::MovedCursor:
+ reply_type = SwkbdReplyType::MovedCursorV2;
+ break;
+ case SwkbdReplyType::MovedCursorUtf8:
+ reply_type = SwkbdReplyType::MovedCursorUtf8V2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ SendReply(reply_type);
+}
+
+void SoftwareKeyboard::InitializeForeground() {
+ LOG_INFO(Service_AM, "Initializing Normal Software Keyboard Applet.");
+
+ is_background = false;
+
+ const auto swkbd_config_storage = broker.PopNormalDataToApplet();
+ ASSERT(swkbd_config_storage != nullptr);
+
+ const auto& swkbd_config_data = swkbd_config_storage->GetData();
+ ASSERT(swkbd_config_data.size() >= sizeof(SwkbdConfigCommon));
+
+ std::memcpy(&swkbd_config_common, swkbd_config_data.data(), sizeof(SwkbdConfigCommon));
+
+ switch (swkbd_applet_version) {
+ case SwkbdAppletVersion::Version5:
+ case SwkbdAppletVersion::Version65542:
+ ASSERT(swkbd_config_data.size() == sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigOld));
+ std::memcpy(&swkbd_config_old, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
+ sizeof(SwkbdConfigOld));
+ break;
+ case SwkbdAppletVersion::Version196615:
+ case SwkbdAppletVersion::Version262152:
+ case SwkbdAppletVersion::Version327689:
+ ASSERT(swkbd_config_data.size() == sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigOld2));
+ std::memcpy(&swkbd_config_old2, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
+ sizeof(SwkbdConfigOld2));
+ break;
+ case SwkbdAppletVersion::Version393227:
+ case SwkbdAppletVersion::Version524301:
+ ASSERT(swkbd_config_data.size() == sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigNew));
+ std::memcpy(&swkbd_config_new, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
+ sizeof(SwkbdConfigNew));
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unknown SwkbdConfig revision={} with size={}", swkbd_applet_version,
+ swkbd_config_data.size());
+ ASSERT(swkbd_config_data.size() >= sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigNew));
+ std::memcpy(&swkbd_config_new, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
+ sizeof(SwkbdConfigNew));
+ break;
+ }
+
+ const auto work_buffer_storage = broker.PopNormalDataToApplet();
+ ASSERT(work_buffer_storage != nullptr);
+
+ if (swkbd_config_common.initial_string_length == 0) {
+ InitializeFrontendKeyboard();
+ return;
+ }
+
+ const auto& work_buffer = work_buffer_storage->GetData();
+
+ std::vector<char16_t> initial_string(swkbd_config_common.initial_string_length);
+
+ std::memcpy(initial_string.data(),
+ work_buffer.data() + swkbd_config_common.initial_string_offset,
+ swkbd_config_common.initial_string_length * sizeof(char16_t));
+
+ initial_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(initial_string.data(),
+ initial_string.size());
+
+ LOG_DEBUG(Service_AM, "\nInitial Text: {}", Common::UTF16ToUTF8(initial_text));
+
+ InitializeFrontendKeyboard();
+}
+
+void SoftwareKeyboard::InitializeBackground(LibraryAppletMode library_applet_mode) {
+ LOG_INFO(Service_AM, "Initializing Inline Software Keyboard Applet.");
+
+ is_background = true;
+
+ const auto swkbd_inline_initialize_arg_storage = broker.PopNormalDataToApplet();
+ ASSERT(swkbd_inline_initialize_arg_storage != nullptr);
+
+ const auto& swkbd_inline_initialize_arg = swkbd_inline_initialize_arg_storage->GetData();
+ ASSERT(swkbd_inline_initialize_arg.size() == sizeof(SwkbdInitializeArg));
+
+ std::memcpy(&swkbd_initialize_arg, swkbd_inline_initialize_arg.data(),
+ swkbd_inline_initialize_arg.size());
+
+ if (swkbd_initialize_arg.library_applet_mode_flag) {
+ ASSERT(library_applet_mode == LibraryAppletMode::Background);
+ } else {
+ ASSERT(library_applet_mode == LibraryAppletMode::BackgroundIndirectDisplay);
+ }
+}
+
+void SoftwareKeyboard::ProcessTextCheck() {
+ const auto text_check_storage = broker.PopInteractiveDataToApplet();
+ ASSERT(text_check_storage != nullptr);
+
+ const auto& text_check_data = text_check_storage->GetData();
+ ASSERT(text_check_data.size() == sizeof(SwkbdTextCheck));
+
+ SwkbdTextCheck swkbd_text_check;
+
+ std::memcpy(&swkbd_text_check, text_check_data.data(), sizeof(SwkbdTextCheck));
+
+ std::u16string text_check_message =
+ swkbd_text_check.text_check_result == SwkbdTextCheckResult::Failure ||
+ swkbd_text_check.text_check_result == SwkbdTextCheckResult::Confirm
+ ? Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ swkbd_text_check.text_check_message.data(),
+ swkbd_text_check.text_check_message.size())
+ : u"";
+
+ LOG_INFO(Service_AM, "\nTextCheckResult: {}\nTextCheckMessage: {}",
+ GetTextCheckResultName(swkbd_text_check.text_check_result),
+ Common::UTF16ToUTF8(text_check_message));
+
+ switch (swkbd_text_check.text_check_result) {
+ case SwkbdTextCheckResult::Success:
+ SubmitNormalOutputAndExit(SwkbdResult::Ok, current_text);
+ break;
+ case SwkbdTextCheckResult::Failure:
+ ShowTextCheckDialog(SwkbdTextCheckResult::Failure, std::move(text_check_message));
+ break;
+ case SwkbdTextCheckResult::Confirm:
+ ShowTextCheckDialog(SwkbdTextCheckResult::Confirm, std::move(text_check_message));
+ break;
+ case SwkbdTextCheckResult::Silent:
+ default:
+ break;
+ }
+}
+
+void SoftwareKeyboard::ProcessInlineKeyboardRequest() {
+ const auto request_data_storage = broker.PopInteractiveDataToApplet();
+ ASSERT(request_data_storage != nullptr);
+
+ const auto& request_data = request_data_storage->GetData();
+ ASSERT(request_data.size() >= sizeof(SwkbdRequestCommand));
+
+ SwkbdRequestCommand request_command;
+
+ std::memcpy(&request_command, request_data.data(), sizeof(SwkbdRequestCommand));
+
+ switch (request_command) {
+ case SwkbdRequestCommand::Finalize:
+ RequestFinalize(request_data);
+ break;
+ case SwkbdRequestCommand::SetUserWordInfo:
+ RequestSetUserWordInfo(request_data);
+ break;
+ case SwkbdRequestCommand::SetCustomizeDic:
+ RequestSetCustomizeDic(request_data);
+ break;
+ case SwkbdRequestCommand::Calc:
+ RequestCalc(request_data);
+ break;
+ case SwkbdRequestCommand::SetCustomizedDictionaries:
+ RequestSetCustomizedDictionaries(request_data);
+ break;
+ case SwkbdRequestCommand::UnsetCustomizedDictionaries:
+ RequestUnsetCustomizedDictionaries(request_data);
+ break;
+ case SwkbdRequestCommand::SetChangedStringV2Flag:
+ RequestSetChangedStringV2Flag(request_data);
+ break;
+ case SwkbdRequestCommand::SetMovedCursorV2Flag:
+ RequestSetMovedCursorV2Flag(request_data);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unknown SwkbdRequestCommand={}", request_command);
+ break;
+ }
+}
+
+void SoftwareKeyboard::SubmitNormalOutputAndExit(SwkbdResult result,
+ std::u16string submitted_text) {
+ std::vector<u8> out_data(sizeof(SwkbdResult) + STRING_BUFFER_SIZE);
+
+ if (swkbd_config_common.use_utf8) {
+ std::string utf8_submitted_text = Common::UTF16ToUTF8(submitted_text);
+
+ LOG_DEBUG(Service_AM, "\nSwkbdResult: {}\nUTF-8 Submitted Text: {}", result,
+ utf8_submitted_text);
+
+ std::memcpy(out_data.data(), &result, sizeof(SwkbdResult));
+ std::memcpy(out_data.data() + sizeof(SwkbdResult), utf8_submitted_text.data(),
+ utf8_submitted_text.size());
+ } else {
+ LOG_DEBUG(Service_AM, "\nSwkbdResult: {}\nUTF-16 Submitted Text: {}", result,
+ Common::UTF16ToUTF8(submitted_text));
+
+ std::memcpy(out_data.data(), &result, sizeof(SwkbdResult));
+ std::memcpy(out_data.data() + sizeof(SwkbdResult), submitted_text.data(),
+ submitted_text.size() * sizeof(char16_t));
+ }
+
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
+
+ ExitKeyboard();
+}
+
+void SoftwareKeyboard::SubmitForTextCheck(std::u16string submitted_text) {
+ current_text = std::move(submitted_text);
+
+ std::vector<u8> out_data(sizeof(u64) + STRING_BUFFER_SIZE);
+
+ if (swkbd_config_common.use_utf8) {
+ std::string utf8_submitted_text = Common::UTF16ToUTF8(current_text);
+ const u64 buffer_size = sizeof(u64) + utf8_submitted_text.size();
+
+ LOG_DEBUG(Service_AM, "\nBuffer Size: {}\nUTF-8 Submitted Text: {}", buffer_size,
+ utf8_submitted_text);
+
+ std::memcpy(out_data.data(), &buffer_size, sizeof(u64));
+ std::memcpy(out_data.data() + sizeof(u64), utf8_submitted_text.data(),
+ utf8_submitted_text.size());
+ } else {
+ const u64 buffer_size = sizeof(u64) + current_text.size() * sizeof(char16_t);
+
+ LOG_DEBUG(Service_AM, "\nBuffer Size: {}\nUTF-16 Submitted Text: {}", buffer_size,
+ Common::UTF16ToUTF8(current_text));
+
+ std::memcpy(out_data.data(), &buffer_size, sizeof(u64));
+ std::memcpy(out_data.data() + sizeof(u64), current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ }
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
+}
+
+void SoftwareKeyboard::SendReply(SwkbdReplyType reply_type) {
+ switch (reply_type) {
+ case SwkbdReplyType::FinishedInitialize:
+ ReplyFinishedInitialize();
+ break;
+ case SwkbdReplyType::Default:
+ ReplyDefault();
+ break;
+ case SwkbdReplyType::ChangedString:
+ ReplyChangedString();
+ break;
+ case SwkbdReplyType::MovedCursor:
+ ReplyMovedCursor();
+ break;
+ case SwkbdReplyType::MovedTab:
+ ReplyMovedTab();
+ break;
+ case SwkbdReplyType::DecidedEnter:
+ ReplyDecidedEnter();
+ break;
+ case SwkbdReplyType::DecidedCancel:
+ ReplyDecidedCancel();
+ break;
+ case SwkbdReplyType::ChangedStringUtf8:
+ ReplyChangedStringUtf8();
+ break;
+ case SwkbdReplyType::MovedCursorUtf8:
+ ReplyMovedCursorUtf8();
+ break;
+ case SwkbdReplyType::DecidedEnterUtf8:
+ ReplyDecidedEnterUtf8();
+ break;
+ case SwkbdReplyType::UnsetCustomizeDic:
+ ReplyUnsetCustomizeDic();
+ break;
+ case SwkbdReplyType::ReleasedUserWordInfo:
+ ReplyReleasedUserWordInfo();
+ break;
+ case SwkbdReplyType::UnsetCustomizedDictionaries:
+ ReplyUnsetCustomizedDictionaries();
+ break;
+ case SwkbdReplyType::ChangedStringV2:
+ ReplyChangedStringV2();
+ break;
+ case SwkbdReplyType::MovedCursorV2:
+ ReplyMovedCursorV2();
+ break;
+ case SwkbdReplyType::ChangedStringUtf8V2:
+ ReplyChangedStringUtf8V2();
+ break;
+ case SwkbdReplyType::MovedCursorUtf8V2:
+ ReplyMovedCursorUtf8V2();
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unknown SwkbdReplyType={}", reply_type);
+ ReplyDefault();
+ break;
+ }
+}
+
+void SoftwareKeyboard::ChangeState(SwkbdState state) {
+ swkbd_state = state;
+
+ ReplyDefault();
+}
+
+void SoftwareKeyboard::InitializeFrontendKeyboard() {
+ if (is_background) {
+ const auto& appear_arg = swkbd_calc_arg.appear_arg;
+
+ std::u16string ok_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ appear_arg.ok_text.data(), appear_arg.ok_text.size());
+
+ const u32 max_text_length =
+ appear_arg.max_text_length > 0 && appear_arg.max_text_length <= DEFAULT_MAX_TEXT_LENGTH
+ ? appear_arg.max_text_length
+ : DEFAULT_MAX_TEXT_LENGTH;
+
+ const u32 min_text_length =
+ appear_arg.min_text_length <= max_text_length ? appear_arg.min_text_length : 0;
+
+ const s32 initial_cursor_position =
+ current_cursor_position > 0 ? current_cursor_position : 0;
+
+ const auto text_draw_type =
+ max_text_length <= 32 ? SwkbdTextDrawType::Line : SwkbdTextDrawType::Box;
+
+ Core::Frontend::KeyboardInitializeParameters initialize_parameters{
+ .ok_text{std::move(ok_text)},
+ .header_text{},
+ .sub_text{},
+ .guide_text{},
+ .initial_text{current_text},
+ .max_text_length{max_text_length},
+ .min_text_length{min_text_length},
+ .initial_cursor_position{initial_cursor_position},
+ .type{appear_arg.type},
+ .password_mode{SwkbdPasswordMode::Disabled},
+ .text_draw_type{text_draw_type},
+ .key_disable_flags{appear_arg.key_disable_flags},
+ .use_blur_background{false},
+ .enable_backspace_button{swkbd_calc_arg.enable_backspace_button},
+ .enable_return_button{appear_arg.enable_return_button},
+ .disable_cancel_button{appear_arg.disable_cancel_button},
+ };
+
+ frontend.InitializeKeyboard(
+ true, std::move(initialize_parameters), {},
+ [this](SwkbdReplyType reply_type, std::u16string submitted_text, s32 cursor_position) {
+ SubmitTextInline(reply_type, submitted_text, cursor_position);
+ });
+ } else {
+ std::u16string ok_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ swkbd_config_common.ok_text.data(), swkbd_config_common.ok_text.size());
+
+ std::u16string header_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ swkbd_config_common.header_text.data(), swkbd_config_common.header_text.size());
+
+ std::u16string sub_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ swkbd_config_common.sub_text.data(), swkbd_config_common.sub_text.size());
+
+ std::u16string guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ swkbd_config_common.guide_text.data(), swkbd_config_common.guide_text.size());
+
+ const u32 max_text_length =
+ swkbd_config_common.max_text_length > 0 &&
+ swkbd_config_common.max_text_length <= DEFAULT_MAX_TEXT_LENGTH
+ ? swkbd_config_common.max_text_length
+ : DEFAULT_MAX_TEXT_LENGTH;
+
+ const u32 min_text_length = swkbd_config_common.min_text_length <= max_text_length
+ ? swkbd_config_common.min_text_length
+ : 0;
+
+ const s32 initial_cursor_position = [this] {
+ switch (swkbd_config_common.initial_cursor_position) {
+ case SwkbdInitialCursorPosition::Start:
+ default:
+ return 0;
+ case SwkbdInitialCursorPosition::End:
+ return static_cast<s32>(initial_text.size());
+ }
+ }();
+
+ const auto text_draw_type = [this, max_text_length] {
+ switch (swkbd_config_common.text_draw_type) {
+ case SwkbdTextDrawType::Line:
+ default:
+ return max_text_length <= 32 ? SwkbdTextDrawType::Line : SwkbdTextDrawType::Box;
+ case SwkbdTextDrawType::Box:
+ case SwkbdTextDrawType::DownloadCode:
+ return swkbd_config_common.text_draw_type;
+ }
+ }();
+
+ const auto enable_return_button = text_draw_type == SwkbdTextDrawType::Box
+ ? swkbd_config_common.enable_return_button
+ : false;
+
+ const auto disable_cancel_button = swkbd_applet_version >= SwkbdAppletVersion::Version393227
+ ? swkbd_config_new.disable_cancel_button
+ : false;
+
+ Core::Frontend::KeyboardInitializeParameters initialize_parameters{
+ .ok_text{std::move(ok_text)},
+ .header_text{std::move(header_text)},
+ .sub_text{std::move(sub_text)},
+ .guide_text{std::move(guide_text)},
+ .initial_text{initial_text},
+ .max_text_length{max_text_length},
+ .min_text_length{min_text_length},
+ .initial_cursor_position{initial_cursor_position},
+ .type{swkbd_config_common.type},
+ .password_mode{swkbd_config_common.password_mode},
+ .text_draw_type{text_draw_type},
+ .key_disable_flags{swkbd_config_common.key_disable_flags},
+ .use_blur_background{swkbd_config_common.use_blur_background},
+ .enable_backspace_button{true},
+ .enable_return_button{enable_return_button},
+ .disable_cancel_button{disable_cancel_button},
+ };
+
+ frontend.InitializeKeyboard(false, std::move(initialize_parameters),
+ [this](SwkbdResult result, std::u16string submitted_text) {
+ SubmitTextNormal(result, submitted_text);
+ },
+ {});
+ }
+}
+
+void SoftwareKeyboard::ShowNormalKeyboard() {
+ frontend.ShowNormalKeyboard();
+}
+
+void SoftwareKeyboard::ShowTextCheckDialog(SwkbdTextCheckResult text_check_result,
+ std::u16string text_check_message) {
+ frontend.ShowTextCheckDialog(text_check_result, std::move(text_check_message));
+}
+
+void SoftwareKeyboard::ShowInlineKeyboard() {
+ if (swkbd_state != SwkbdState::InitializedIsHidden) {
+ return;
+ }
+
+ ChangeState(SwkbdState::InitializedIsAppearing);
+
+ const auto& appear_arg = swkbd_calc_arg.appear_arg;
+
+ const u32 max_text_length =
+ appear_arg.max_text_length > 0 && appear_arg.max_text_length <= DEFAULT_MAX_TEXT_LENGTH
+ ? appear_arg.max_text_length
+ : DEFAULT_MAX_TEXT_LENGTH;
+
+ const u32 min_text_length =
+ appear_arg.min_text_length <= max_text_length ? appear_arg.min_text_length : 0;
+
+ Core::Frontend::InlineAppearParameters appear_parameters{
+ .max_text_length{max_text_length},
+ .min_text_length{min_text_length},
+ .key_top_scale_x{swkbd_calc_arg.key_top_scale_x},
+ .key_top_scale_y{swkbd_calc_arg.key_top_scale_y},
+ .key_top_translate_x{swkbd_calc_arg.key_top_translate_x},
+ .key_top_translate_y{swkbd_calc_arg.key_top_translate_y},
+ .type{appear_arg.type},
+ .key_disable_flags{appear_arg.key_disable_flags},
+ .key_top_as_floating{swkbd_calc_arg.key_top_as_floating},
+ .enable_backspace_button{swkbd_calc_arg.enable_backspace_button},
+ .enable_return_button{appear_arg.enable_return_button},
+ .disable_cancel_button{appear_arg.disable_cancel_button},
+ };
+
+ frontend.ShowInlineKeyboard(std::move(appear_parameters));
+
+ ChangeState(SwkbdState::InitializedIsShown);
+}
+
+void SoftwareKeyboard::HideInlineKeyboard() {
+ if (swkbd_state != SwkbdState::InitializedIsShown) {
+ return;
+ }
+
+ ChangeState(SwkbdState::InitializedIsDisappearing);
+
+ frontend.HideInlineKeyboard();
+
+ ChangeState(SwkbdState::InitializedIsHidden);
+}
+
+void SoftwareKeyboard::InlineTextChanged() {
+ Core::Frontend::InlineTextParameters text_parameters{
+ .input_text{current_text},
+ .cursor_position{current_cursor_position},
+ };
+
+ frontend.InlineTextChanged(std::move(text_parameters));
+}
+
+void SoftwareKeyboard::ExitKeyboard() {
+ complete = true;
+ status = ResultSuccess;
+
+ frontend.ExitKeyboard();
+
+ broker.SignalStateChanged();
+}
+
+// Inline Software Keyboard Requests
+
+void SoftwareKeyboard::RequestFinalize(const std::vector<u8>& request_data) {
+ LOG_DEBUG(Service_AM, "Processing Request: Finalize");
+
+ ChangeState(SwkbdState::NotInitialized);
+
+ ExitKeyboard();
+}
+
+void SoftwareKeyboard::RequestSetUserWordInfo(const std::vector<u8>& request_data) {
+ LOG_WARNING(Service_AM, "SetUserWordInfo is not implemented.");
+}
+
+void SoftwareKeyboard::RequestSetCustomizeDic(const std::vector<u8>& request_data) {
+ LOG_WARNING(Service_AM, "SetCustomizeDic is not implemented.");
+}
+
+void SoftwareKeyboard::RequestCalc(const std::vector<u8>& request_data) {
+ LOG_DEBUG(Service_AM, "Processing Request: Calc");
+
+ ASSERT(request_data.size() == sizeof(SwkbdRequestCommand) + sizeof(SwkbdCalcArg));
+
+ std::memcpy(&swkbd_calc_arg, request_data.data() + sizeof(SwkbdRequestCommand),
+ sizeof(SwkbdCalcArg));
+
+ if (swkbd_calc_arg.flags.set_input_text) {
+ current_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
+ swkbd_calc_arg.input_text.data(), swkbd_calc_arg.input_text.size());
+ }
+
+ if (swkbd_calc_arg.flags.set_cursor_position) {
+ current_cursor_position = swkbd_calc_arg.cursor_position;
+ }
+
+ if (swkbd_calc_arg.flags.set_utf8_mode) {
+ inline_use_utf8 = swkbd_calc_arg.utf8_mode;
+ }
+
+ if (swkbd_state <= SwkbdState::InitializedIsHidden &&
+ swkbd_calc_arg.flags.unset_customize_dic) {
+ ReplyUnsetCustomizeDic();
+ }
+
+ if (swkbd_state <= SwkbdState::InitializedIsHidden &&
+ swkbd_calc_arg.flags.unset_user_word_info) {
+ ReplyReleasedUserWordInfo();
+ }
+
+ if (swkbd_state == SwkbdState::NotInitialized && swkbd_calc_arg.flags.set_initialize_arg) {
+ InitializeFrontendKeyboard();
+
+ ChangeState(SwkbdState::InitializedIsHidden);
+
+ ReplyFinishedInitialize();
+ }
+
+ if (!swkbd_calc_arg.flags.set_initialize_arg &&
+ (swkbd_calc_arg.flags.set_input_text || swkbd_calc_arg.flags.set_cursor_position)) {
+ InlineTextChanged();
+ }
+
+ if (swkbd_state == SwkbdState::InitializedIsHidden && swkbd_calc_arg.flags.appear) {
+ ShowInlineKeyboard();
+ return;
+ }
+
+ if (swkbd_state == SwkbdState::InitializedIsShown && swkbd_calc_arg.flags.disappear) {
+ HideInlineKeyboard();
+ return;
+ }
+}
+
+void SoftwareKeyboard::RequestSetCustomizedDictionaries(const std::vector<u8>& request_data) {
+ LOG_WARNING(Service_AM, "SetCustomizedDictionaries is not implemented.");
+}
+
+void SoftwareKeyboard::RequestUnsetCustomizedDictionaries(const std::vector<u8>& request_data) {
+ LOG_WARNING(Service_AM, "(STUBBED) Processing Request: UnsetCustomizedDictionaries");
+
+ ReplyUnsetCustomizedDictionaries();
+}
+
+void SoftwareKeyboard::RequestSetChangedStringV2Flag(const std::vector<u8>& request_data) {
+ LOG_DEBUG(Service_AM, "Processing Request: SetChangedStringV2Flag");
+
+ ASSERT(request_data.size() == sizeof(SwkbdRequestCommand) + 1);
+
+ std::memcpy(&use_changed_string_v2, request_data.data() + sizeof(SwkbdRequestCommand), 1);
+}
+
+void SoftwareKeyboard::RequestSetMovedCursorV2Flag(const std::vector<u8>& request_data) {
+ LOG_DEBUG(Service_AM, "Processing Request: SetMovedCursorV2Flag");
+
+ ASSERT(request_data.size() == sizeof(SwkbdRequestCommand) + 1);
+
+ std::memcpy(&use_moved_cursor_v2, request_data.data() + sizeof(SwkbdRequestCommand), 1);
+}
+
+// Inline Software Keyboard Replies
+
+void SoftwareKeyboard::ReplyFinishedInitialize() {
+ LOG_DEBUG(Service_AM, "Sending Reply: FinishedInitialize");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + 1);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::FinishedInitialize);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyDefault() {
+ LOG_DEBUG(Service_AM, "Sending Reply: Default");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::Default);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyChangedString() {
+ LOG_DEBUG(Service_AM, "Sending Reply: ChangedString");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdChangedStringArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedString);
+
+ const SwkbdChangedStringArg changed_string_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .dictionary_start_cursor_position{-1},
+ .dictionary_end_cursor_position{-1},
+ .cursor_position{current_cursor_position},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &changed_string_arg,
+ sizeof(SwkbdChangedStringArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyMovedCursor() {
+ LOG_DEBUG(Service_AM, "Sending Reply: MovedCursor");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedCursorArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursor);
+
+ const SwkbdMovedCursorArg moved_cursor_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .cursor_position{current_cursor_position},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &moved_cursor_arg,
+ sizeof(SwkbdMovedCursorArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyMovedTab() {
+ LOG_DEBUG(Service_AM, "Sending Reply: MovedTab");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedTabArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedTab);
+
+ const SwkbdMovedTabArg moved_tab_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .cursor_position{current_cursor_position},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &moved_tab_arg,
+ sizeof(SwkbdMovedTabArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyDecidedEnter() {
+ LOG_DEBUG(Service_AM, "Sending Reply: DecidedEnter");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdDecidedEnterArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::DecidedEnter);
+
+ const SwkbdDecidedEnterArg decided_enter_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &decided_enter_arg,
+ sizeof(SwkbdDecidedEnterArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+
+ HideInlineKeyboard();
+}
+
+void SoftwareKeyboard::ReplyDecidedCancel() {
+ LOG_DEBUG(Service_AM, "Sending Reply: DecidedCancel");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::DecidedCancel);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+
+ HideInlineKeyboard();
+}
+
+void SoftwareKeyboard::ReplyChangedStringUtf8() {
+ LOG_DEBUG(Service_AM, "Sending Reply: ChangedStringUtf8");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdChangedStringArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedStringUtf8);
+
+ std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
+
+ const SwkbdChangedStringArg changed_string_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .dictionary_start_cursor_position{-1},
+ .dictionary_end_cursor_position{-1},
+ .cursor_position{current_cursor_position},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &changed_string_arg,
+ sizeof(SwkbdChangedStringArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyMovedCursorUtf8() {
+ LOG_DEBUG(Service_AM, "Sending Reply: MovedCursorUtf8");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdMovedCursorArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursorUtf8);
+
+ std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
+
+ const SwkbdMovedCursorArg moved_cursor_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .cursor_position{current_cursor_position},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &moved_cursor_arg,
+ sizeof(SwkbdMovedCursorArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyDecidedEnterUtf8() {
+ LOG_DEBUG(Service_AM, "Sending Reply: DecidedEnterUtf8");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdDecidedEnterArg));
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::DecidedEnterUtf8);
+
+ std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
+
+ const SwkbdDecidedEnterArg decided_enter_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ };
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &decided_enter_arg,
+ sizeof(SwkbdDecidedEnterArg));
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+
+ HideInlineKeyboard();
+}
+
+void SoftwareKeyboard::ReplyUnsetCustomizeDic() {
+ LOG_DEBUG(Service_AM, "Sending Reply: UnsetCustomizeDic");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::UnsetCustomizeDic);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyReleasedUserWordInfo() {
+ LOG_DEBUG(Service_AM, "Sending Reply: ReleasedUserWordInfo");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::ReleasedUserWordInfo);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyUnsetCustomizedDictionaries() {
+ LOG_DEBUG(Service_AM, "Sending Reply: UnsetCustomizedDictionaries");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::UnsetCustomizedDictionaries);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyChangedStringV2() {
+ LOG_DEBUG(Service_AM, "Sending Reply: ChangedStringV2");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdChangedStringArg) + 1);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedStringV2);
+
+ const SwkbdChangedStringArg changed_string_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .dictionary_start_cursor_position{-1},
+ .dictionary_end_cursor_position{-1},
+ .cursor_position{current_cursor_position},
+ };
+
+ constexpr u8 flag = 0;
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &changed_string_arg,
+ sizeof(SwkbdChangedStringArg));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdChangedStringArg),
+ &flag, 1);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyMovedCursorV2() {
+ LOG_DEBUG(Service_AM, "Sending Reply: MovedCursorV2");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedCursorArg) + 1);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursorV2);
+
+ const SwkbdMovedCursorArg moved_cursor_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .cursor_position{current_cursor_position},
+ };
+
+ constexpr u8 flag = 0;
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
+ current_text.size() * sizeof(char16_t));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &moved_cursor_arg,
+ sizeof(SwkbdMovedCursorArg));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedCursorArg),
+ &flag, 1);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyChangedStringUtf8V2() {
+ LOG_DEBUG(Service_AM, "Sending Reply: ChangedStringUtf8V2");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdChangedStringArg) + 1);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedStringUtf8V2);
+
+ std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
+
+ const SwkbdChangedStringArg changed_string_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .dictionary_start_cursor_position{-1},
+ .dictionary_end_cursor_position{-1},
+ .cursor_position{current_cursor_position},
+ };
+
+ constexpr u8 flag = 0;
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &changed_string_arg,
+ sizeof(SwkbdChangedStringArg));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdChangedStringArg),
+ &flag, 1);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+void SoftwareKeyboard::ReplyMovedCursorUtf8V2() {
+ LOG_DEBUG(Service_AM, "Sending Reply: MovedCursorUtf8V2");
+
+ std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdMovedCursorArg) + 1);
+
+ SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursorUtf8V2);
+
+ std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
+
+ const SwkbdMovedCursorArg moved_cursor_arg{
+ .text_length{static_cast<u32>(current_text.size())},
+ .cursor_position{current_cursor_position},
+ };
+
+ constexpr u8 flag = 0;
+
+ std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &moved_cursor_arg,
+ sizeof(SwkbdMovedCursorArg));
+ std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdMovedCursorArg),
+ &flag, 1);
+
+ broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
+}
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/applet_software_keyboard.h b/src/core/hle/service/am/applets/applet_software_keyboard.h
new file mode 100644
index 000000000..9aef1bf11
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_software_keyboard.h
@@ -0,0 +1,166 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "core/hle/result.h"
+#include "core/hle/service/am/applets/applet_software_keyboard_types.h"
+#include "core/hle/service/am/applets/applets.h"
+
+namespace Core {
+class System;
+}
+
+namespace Service::AM::Applets {
+
+class SoftwareKeyboard final : public Applet {
+public:
+ explicit SoftwareKeyboard(Core::System& system_, LibraryAppletMode applet_mode_,
+ Core::Frontend::SoftwareKeyboardApplet& frontend_);
+ ~SoftwareKeyboard() override;
+
+ void Initialize() override;
+
+ bool TransactionComplete() const override;
+ ResultCode GetStatus() const override;
+ void ExecuteInteractive() override;
+ void Execute() override;
+
+ /**
+ * Submits the input text to the application.
+ * If text checking is enabled, the application will verify the input text.
+ * If use_utf8 is enabled, the input text will be converted to UTF-8 prior to being submitted.
+ * This should only be used by the normal software keyboard.
+ *
+ * @param result SwkbdResult enum
+ * @param submitted_text UTF-16 encoded string
+ */
+ void SubmitTextNormal(SwkbdResult result, std::u16string submitted_text);
+
+ /**
+ * Submits the input text to the application.
+ * If utf8_mode is enabled, the input text will be converted to UTF-8 prior to being submitted.
+ * This should only be used by the inline software keyboard.
+ *
+ * @param reply_type SwkbdReplyType enum
+ * @param submitted_text UTF-16 encoded string
+ * @param cursor_position The current position of the text cursor
+ */
+ void SubmitTextInline(SwkbdReplyType reply_type, std::u16string submitted_text,
+ s32 cursor_position);
+
+private:
+ /// Initializes the normal software keyboard.
+ void InitializeForeground();
+
+ /// Initializes the inline software keyboard.
+ void InitializeBackground(LibraryAppletMode library_applet_mode);
+
+ /// Processes the text check sent by the application.
+ void ProcessTextCheck();
+
+ /// Processes the inline software keyboard request command sent by the application.
+ void ProcessInlineKeyboardRequest();
+
+ /// Submits the input text and exits the applet.
+ void SubmitNormalOutputAndExit(SwkbdResult result, std::u16string submitted_text);
+
+ /// Submits the input text for text checking.
+ void SubmitForTextCheck(std::u16string submitted_text);
+
+ /// Sends a reply to the application after processing a request command.
+ void SendReply(SwkbdReplyType reply_type);
+
+ /// Changes the inline keyboard state.
+ void ChangeState(SwkbdState state);
+
+ /**
+ * Signals the frontend to initialize the software keyboard with common parameters.
+ * This initializes either the normal software keyboard or the inline software keyboard
+ * depending on the state of is_background.
+ * Note that this does not cause the keyboard to appear.
+ * Use the respective Show*Keyboard() functions to cause the respective keyboards to appear.
+ */
+ void InitializeFrontendKeyboard();
+
+ /// Signals the frontend to show the normal software keyboard.
+ void ShowNormalKeyboard();
+
+ /// Signals the frontend to show the text check dialog.
+ void ShowTextCheckDialog(SwkbdTextCheckResult text_check_result,
+ std::u16string text_check_message);
+
+ /// Signals the frontend to show the inline software keyboard.
+ void ShowInlineKeyboard();
+
+ /// Signals the frontend to hide the inline software keyboard.
+ void HideInlineKeyboard();
+
+ /// Signals the frontend that the current inline keyboard text has changed.
+ void InlineTextChanged();
+
+ /// Signals both the frontend and application that the software keyboard is exiting.
+ void ExitKeyboard();
+
+ // Inline Software Keyboard Requests
+
+ void RequestFinalize(const std::vector<u8>& request_data);
+ void RequestSetUserWordInfo(const std::vector<u8>& request_data);
+ void RequestSetCustomizeDic(const std::vector<u8>& request_data);
+ void RequestCalc(const std::vector<u8>& request_data);
+ void RequestSetCustomizedDictionaries(const std::vector<u8>& request_data);
+ void RequestUnsetCustomizedDictionaries(const std::vector<u8>& request_data);
+ void RequestSetChangedStringV2Flag(const std::vector<u8>& request_data);
+ void RequestSetMovedCursorV2Flag(const std::vector<u8>& request_data);
+
+ // Inline Software Keyboard Replies
+
+ void ReplyFinishedInitialize();
+ void ReplyDefault();
+ void ReplyChangedString();
+ void ReplyMovedCursor();
+ void ReplyMovedTab();
+ void ReplyDecidedEnter();
+ void ReplyDecidedCancel();
+ void ReplyChangedStringUtf8();
+ void ReplyMovedCursorUtf8();
+ void ReplyDecidedEnterUtf8();
+ void ReplyUnsetCustomizeDic();
+ void ReplyReleasedUserWordInfo();
+ void ReplyUnsetCustomizedDictionaries();
+ void ReplyChangedStringV2();
+ void ReplyMovedCursorV2();
+ void ReplyChangedStringUtf8V2();
+ void ReplyMovedCursorUtf8V2();
+
+ Core::Frontend::SoftwareKeyboardApplet& frontend;
+ Core::System& system;
+
+ SwkbdAppletVersion swkbd_applet_version;
+
+ SwkbdConfigCommon swkbd_config_common;
+ SwkbdConfigOld swkbd_config_old;
+ SwkbdConfigOld2 swkbd_config_old2;
+ SwkbdConfigNew swkbd_config_new;
+ std::u16string initial_text;
+
+ SwkbdState swkbd_state{SwkbdState::NotInitialized};
+ SwkbdInitializeArg swkbd_initialize_arg;
+ SwkbdCalcArg swkbd_calc_arg;
+ bool use_changed_string_v2{false};
+ bool use_moved_cursor_v2{false};
+ bool inline_use_utf8{false};
+ s32 current_cursor_position{};
+
+ std::u16string current_text;
+
+ bool is_background{false};
+
+ bool complete{false};
+ ResultCode status{ResultSuccess};
+};
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/software_keyboard_types.h b/src/core/hle/service/am/applets/applet_software_keyboard_types.h
index 21aa8e800..21aa8e800 100644
--- a/src/core/hle/service/am/applets/software_keyboard_types.h
+++ b/src/core/hle/service/am/applets/applet_software_keyboard_types.h
diff --git a/src/core/hle/service/am/applets/applet_web_browser.cpp b/src/core/hle/service/am/applets/applet_web_browser.cpp
new file mode 100644
index 000000000..35f194961
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_web_browser.cpp
@@ -0,0 +1,487 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/fs/file.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
+#include "common/logging/log.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/file_sys/content_archive.h"
+#include "core/file_sys/mode.h"
+#include "core/file_sys/nca_metadata.h"
+#include "core/file_sys/patch_manager.h"
+#include "core/file_sys/registered_cache.h"
+#include "core/file_sys/romfs.h"
+#include "core/file_sys/system_archive/system_archive.h"
+#include "core/file_sys/vfs_vector.h"
+#include "core/frontend/applets/web_browser.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/result.h"
+#include "core/hle/service/am/am.h"
+#include "core/hle/service/am/applets/applet_web_browser.h"
+#include "core/hle/service/filesystem/filesystem.h"
+#include "core/hle/service/ns/pl_u.h"
+
+namespace Service::AM::Applets {
+
+namespace {
+
+template <typename T>
+void ParseRawValue(T& value, const std::vector<u8>& data) {
+ static_assert(std::is_trivially_copyable_v<T>,
+ "It's undefined behavior to use memcpy with non-trivially copyable objects");
+ std::memcpy(&value, data.data(), data.size());
+}
+
+template <typename T>
+T ParseRawValue(const std::vector<u8>& data) {
+ T value;
+ ParseRawValue(value, data);
+ return value;
+}
+
+std::string ParseStringValue(const std::vector<u8>& data) {
+ return Common::StringFromFixedZeroTerminatedBuffer(reinterpret_cast<const char*>(data.data()),
+ data.size());
+}
+
+std::string GetMainURL(const std::string& url) {
+ const auto index = url.find('?');
+
+ if (index == std::string::npos) {
+ return url;
+ }
+
+ return url.substr(0, index);
+}
+
+std::string ResolveURL(const std::string& url) {
+ const auto index = url.find_first_of('%');
+
+ if (index == std::string::npos) {
+ return url;
+ }
+
+ return url.substr(0, index) + "lp1" + url.substr(index + 1);
+}
+
+WebArgInputTLVMap ReadWebArgs(const std::vector<u8>& web_arg, WebArgHeader& web_arg_header) {
+ std::memcpy(&web_arg_header, web_arg.data(), sizeof(WebArgHeader));
+
+ if (web_arg.size() == sizeof(WebArgHeader)) {
+ return {};
+ }
+
+ WebArgInputTLVMap input_tlv_map;
+
+ u64 current_offset = sizeof(WebArgHeader);
+
+ for (std::size_t i = 0; i < web_arg_header.total_tlv_entries; ++i) {
+ if (web_arg.size() < current_offset + sizeof(WebArgInputTLV)) {
+ return input_tlv_map;
+ }
+
+ WebArgInputTLV input_tlv;
+ std::memcpy(&input_tlv, web_arg.data() + current_offset, sizeof(WebArgInputTLV));
+
+ current_offset += sizeof(WebArgInputTLV);
+
+ if (web_arg.size() < current_offset + input_tlv.arg_data_size) {
+ return input_tlv_map;
+ }
+
+ std::vector<u8> data(input_tlv.arg_data_size);
+ std::memcpy(data.data(), web_arg.data() + current_offset, input_tlv.arg_data_size);
+
+ current_offset += input_tlv.arg_data_size;
+
+ input_tlv_map.insert_or_assign(input_tlv.input_tlv_type, std::move(data));
+ }
+
+ return input_tlv_map;
+}
+
+FileSys::VirtualFile GetOfflineRomFS(Core::System& system, u64 title_id,
+ FileSys::ContentRecordType nca_type) {
+ if (nca_type == FileSys::ContentRecordType::Data) {
+ const auto nca =
+ system.GetFileSystemController().GetSystemNANDContents()->GetEntry(title_id, nca_type);
+
+ if (nca == nullptr) {
+ LOG_ERROR(Service_AM,
+ "NCA of type={} with title_id={:016X} is not found in the System NAND!",
+ nca_type, title_id);
+ return FileSys::SystemArchive::SynthesizeSystemArchive(title_id);
+ }
+
+ return nca->GetRomFS();
+ } else {
+ const auto nca = system.GetContentProvider().GetEntry(title_id, nca_type);
+
+ if (nca == nullptr) {
+ LOG_ERROR(Service_AM,
+ "NCA of type={} with title_id={:016X} is not found in the ContentProvider!",
+ nca_type, title_id);
+ return nullptr;
+ }
+
+ const FileSys::PatchManager pm{title_id, system.GetFileSystemController(),
+ system.GetContentProvider()};
+
+ return pm.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), nca_type);
+ }
+}
+
+void ExtractSharedFonts(Core::System& system) {
+ static constexpr std::array<const char*, 7> DECRYPTED_SHARED_FONTS{
+ "FontStandard.ttf",
+ "FontChineseSimplified.ttf",
+ "FontExtendedChineseSimplified.ttf",
+ "FontChineseTraditional.ttf",
+ "FontKorean.ttf",
+ "FontNintendoExtended.ttf",
+ "FontNintendoExtended2.ttf",
+ };
+
+ const auto fonts_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) / "fonts";
+
+ for (std::size_t i = 0; i < NS::SHARED_FONTS.size(); ++i) {
+ const auto font_file_path = fonts_dir / DECRYPTED_SHARED_FONTS[i];
+
+ if (Common::FS::Exists(font_file_path)) {
+ continue;
+ }
+
+ const auto font = NS::SHARED_FONTS[i];
+ const auto font_title_id = static_cast<u64>(font.first);
+
+ const auto nca = system.GetFileSystemController().GetSystemNANDContents()->GetEntry(
+ font_title_id, FileSys::ContentRecordType::Data);
+
+ FileSys::VirtualFile romfs;
+
+ if (!nca) {
+ romfs = FileSys::SystemArchive::SynthesizeSystemArchive(font_title_id);
+ } else {
+ romfs = nca->GetRomFS();
+ }
+
+ if (!romfs) {
+ LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} cannot be extracted!",
+ font_title_id);
+ continue;
+ }
+
+ const auto extracted_romfs = FileSys::ExtractRomFS(romfs);
+
+ if (!extracted_romfs) {
+ LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} failed to extract!",
+ font_title_id);
+ continue;
+ }
+
+ const auto font_file = extracted_romfs->GetFile(font.second);
+
+ if (!font_file) {
+ LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} has no font file \"{}\"!",
+ font_title_id, font.second);
+ continue;
+ }
+
+ std::vector<u32> font_data_u32(font_file->GetSize() / sizeof(u32));
+ font_file->ReadBytes<u32>(font_data_u32.data(), font_file->GetSize());
+
+ std::transform(font_data_u32.begin(), font_data_u32.end(), font_data_u32.begin(),
+ Common::swap32);
+
+ std::vector<u8> decrypted_data(font_file->GetSize() - 8);
+
+ NS::DecryptSharedFontToTTF(font_data_u32, decrypted_data);
+
+ FileSys::VirtualFile decrypted_font = std::make_shared<FileSys::VectorVfsFile>(
+ std::move(decrypted_data), DECRYPTED_SHARED_FONTS[i]);
+
+ const auto temp_dir = system.GetFilesystem()->CreateDirectory(
+ Common::FS::PathToUTF8String(fonts_dir), FileSys::Mode::ReadWrite);
+
+ const auto out_file = temp_dir->CreateFile(DECRYPTED_SHARED_FONTS[i]);
+
+ FileSys::VfsRawCopy(decrypted_font, out_file);
+ }
+}
+
+} // namespace
+
+WebBrowser::WebBrowser(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::WebBrowserApplet& frontend_)
+ : Applet{system_, applet_mode_}, frontend(frontend_), system{system_} {}
+
+WebBrowser::~WebBrowser() = default;
+
+void WebBrowser::Initialize() {
+ Applet::Initialize();
+
+ LOG_INFO(Service_AM, "Initializing Web Browser Applet.");
+
+ LOG_DEBUG(Service_AM,
+ "Initializing Applet with common_args: arg_version={}, lib_version={}, "
+ "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
+ common_args.arguments_version, common_args.library_version,
+ common_args.play_startup_sound, common_args.size, common_args.system_tick,
+ common_args.theme_color);
+
+ web_applet_version = WebAppletVersion{common_args.library_version};
+
+ const auto web_arg_storage = broker.PopNormalDataToApplet();
+ ASSERT(web_arg_storage != nullptr);
+
+ const auto& web_arg = web_arg_storage->GetData();
+ ASSERT_OR_EXECUTE(web_arg.size() >= sizeof(WebArgHeader), { return; });
+
+ web_arg_input_tlv_map = ReadWebArgs(web_arg, web_arg_header);
+
+ LOG_DEBUG(Service_AM, "WebArgHeader: total_tlv_entries={}, shim_kind={}",
+ web_arg_header.total_tlv_entries, web_arg_header.shim_kind);
+
+ ExtractSharedFonts(system);
+
+ switch (web_arg_header.shim_kind) {
+ case ShimKind::Shop:
+ InitializeShop();
+ break;
+ case ShimKind::Login:
+ InitializeLogin();
+ break;
+ case ShimKind::Offline:
+ InitializeOffline();
+ break;
+ case ShimKind::Share:
+ InitializeShare();
+ break;
+ case ShimKind::Web:
+ InitializeWeb();
+ break;
+ case ShimKind::Wifi:
+ InitializeWifi();
+ break;
+ case ShimKind::Lobby:
+ InitializeLobby();
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind);
+ break;
+ }
+}
+
+bool WebBrowser::TransactionComplete() const {
+ return complete;
+}
+
+ResultCode WebBrowser::GetStatus() const {
+ return status;
+}
+
+void WebBrowser::ExecuteInteractive() {
+ UNIMPLEMENTED_MSG("WebSession is not implemented");
+}
+
+void WebBrowser::Execute() {
+ switch (web_arg_header.shim_kind) {
+ case ShimKind::Shop:
+ ExecuteShop();
+ break;
+ case ShimKind::Login:
+ ExecuteLogin();
+ break;
+ case ShimKind::Offline:
+ ExecuteOffline();
+ break;
+ case ShimKind::Share:
+ ExecuteShare();
+ break;
+ case ShimKind::Web:
+ ExecuteWeb();
+ break;
+ case ShimKind::Wifi:
+ ExecuteWifi();
+ break;
+ case ShimKind::Lobby:
+ ExecuteLobby();
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind);
+ WebBrowserExit(WebExitReason::EndButtonPressed);
+ break;
+ }
+}
+
+void WebBrowser::ExtractOfflineRomFS() {
+ LOG_DEBUG(Service_AM, "Extracting RomFS to {}",
+ Common::FS::PathToUTF8String(offline_cache_dir));
+
+ const auto extracted_romfs_dir =
+ FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard);
+
+ const auto temp_dir = system.GetFilesystem()->CreateDirectory(
+ Common::FS::PathToUTF8String(offline_cache_dir), FileSys::Mode::ReadWrite);
+
+ FileSys::VfsRawCopyD(extracted_romfs_dir, temp_dir);
+}
+
+void WebBrowser::WebBrowserExit(WebExitReason exit_reason, std::string last_url) {
+ if ((web_arg_header.shim_kind == ShimKind::Share &&
+ web_applet_version >= WebAppletVersion::Version196608) ||
+ (web_arg_header.shim_kind == ShimKind::Web &&
+ web_applet_version >= WebAppletVersion::Version524288)) {
+ // TODO: Push Output TLVs instead of a WebCommonReturnValue
+ }
+
+ WebCommonReturnValue web_common_return_value;
+
+ web_common_return_value.exit_reason = exit_reason;
+ std::memcpy(&web_common_return_value.last_url, last_url.data(), last_url.size());
+ web_common_return_value.last_url_size = last_url.size();
+
+ LOG_DEBUG(Service_AM, "WebCommonReturnValue: exit_reason={}, last_url={}, last_url_size={}",
+ exit_reason, last_url, last_url.size());
+
+ complete = true;
+ std::vector<u8> out_data(sizeof(WebCommonReturnValue));
+ std::memcpy(out_data.data(), &web_common_return_value, out_data.size());
+ broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
+ broker.SignalStateChanged();
+}
+
+bool WebBrowser::InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const {
+ return web_arg_input_tlv_map.find(input_tlv_type) != web_arg_input_tlv_map.end();
+}
+
+std::optional<std::vector<u8>> WebBrowser::GetInputTLVData(WebArgInputTLVType input_tlv_type) {
+ const auto map_it = web_arg_input_tlv_map.find(input_tlv_type);
+
+ if (map_it == web_arg_input_tlv_map.end()) {
+ return std::nullopt;
+ }
+
+ return map_it->second;
+}
+
+void WebBrowser::InitializeShop() {}
+
+void WebBrowser::InitializeLogin() {}
+
+void WebBrowser::InitializeOffline() {
+ const auto document_path =
+ ParseStringValue(GetInputTLVData(WebArgInputTLVType::DocumentPath).value());
+
+ const auto document_kind =
+ ParseRawValue<DocumentKind>(GetInputTLVData(WebArgInputTLVType::DocumentKind).value());
+
+ std::string additional_paths;
+
+ switch (document_kind) {
+ case DocumentKind::OfflineHtmlPage:
+ default:
+ title_id = system.CurrentProcess()->GetTitleID();
+ nca_type = FileSys::ContentRecordType::HtmlDocument;
+ additional_paths = "html-document";
+ break;
+ case DocumentKind::ApplicationLegalInformation:
+ title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::ApplicationID).value());
+ nca_type = FileSys::ContentRecordType::LegalInformation;
+ break;
+ case DocumentKind::SystemDataPage:
+ title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::SystemDataID).value());
+ nca_type = FileSys::ContentRecordType::Data;
+ break;
+ }
+
+ static constexpr std::array<const char*, 3> RESOURCE_TYPES{
+ "manual",
+ "legal_information",
+ "system_data",
+ };
+
+ offline_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
+ fmt::format("offline_web_applet_{}/{:016X}",
+ RESOURCE_TYPES[static_cast<u32>(document_kind) - 1], title_id);
+
+ offline_document = Common::FS::ConcatPathSafe(
+ offline_cache_dir, fmt::format("{}/{}", additional_paths, document_path));
+}
+
+void WebBrowser::InitializeShare() {}
+
+void WebBrowser::InitializeWeb() {
+ external_url = ParseStringValue(GetInputTLVData(WebArgInputTLVType::InitialURL).value());
+
+ // Resolve Nintendo CDN URLs.
+ external_url = ResolveURL(external_url);
+}
+
+void WebBrowser::InitializeWifi() {}
+
+void WebBrowser::InitializeLobby() {}
+
+void WebBrowser::ExecuteShop() {
+ LOG_WARNING(Service_AM, "(STUBBED) called, Shop Applet is not implemented");
+ WebBrowserExit(WebExitReason::EndButtonPressed);
+}
+
+void WebBrowser::ExecuteLogin() {
+ LOG_WARNING(Service_AM, "(STUBBED) called, Login Applet is not implemented");
+ WebBrowserExit(WebExitReason::EndButtonPressed);
+}
+
+void WebBrowser::ExecuteOffline() {
+ const auto main_url = GetMainURL(Common::FS::PathToUTF8String(offline_document));
+
+ if (!Common::FS::Exists(main_url)) {
+ offline_romfs = GetOfflineRomFS(system, title_id, nca_type);
+
+ if (offline_romfs == nullptr) {
+ LOG_ERROR(Service_AM,
+ "RomFS with title_id={:016X} and nca_type={} cannot be extracted!", title_id,
+ nca_type);
+ WebBrowserExit(WebExitReason::WindowClosed);
+ return;
+ }
+ }
+
+ LOG_INFO(Service_AM, "Opening offline document at {}",
+ Common::FS::PathToUTF8String(offline_document));
+
+ frontend.OpenLocalWebPage(
+ Common::FS::PathToUTF8String(offline_document), [this] { ExtractOfflineRomFS(); },
+ [this](WebExitReason exit_reason, std::string last_url) {
+ WebBrowserExit(exit_reason, last_url);
+ });
+}
+
+void WebBrowser::ExecuteShare() {
+ LOG_WARNING(Service_AM, "(STUBBED) called, Share Applet is not implemented");
+ WebBrowserExit(WebExitReason::EndButtonPressed);
+}
+
+void WebBrowser::ExecuteWeb() {
+ LOG_INFO(Service_AM, "Opening external URL at {}", external_url);
+
+ frontend.OpenExternalWebPage(external_url,
+ [this](WebExitReason exit_reason, std::string last_url) {
+ WebBrowserExit(exit_reason, last_url);
+ });
+}
+
+void WebBrowser::ExecuteWifi() {
+ LOG_WARNING(Service_AM, "(STUBBED) called, Wifi Applet is not implemented");
+ WebBrowserExit(WebExitReason::EndButtonPressed);
+}
+
+void WebBrowser::ExecuteLobby() {
+ LOG_WARNING(Service_AM, "(STUBBED) called, Lobby Applet is not implemented");
+ WebBrowserExit(WebExitReason::EndButtonPressed);
+}
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/applet_web_browser.h b/src/core/hle/service/am/applets/applet_web_browser.h
new file mode 100644
index 000000000..4f9e81b79
--- /dev/null
+++ b/src/core/hle/service/am/applets/applet_web_browser.h
@@ -0,0 +1,88 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <filesystem>
+#include <optional>
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "core/file_sys/vfs_types.h"
+#include "core/hle/result.h"
+#include "core/hle/service/am/applets/applet_web_browser_types.h"
+#include "core/hle/service/am/applets/applets.h"
+
+namespace Core {
+class System;
+}
+
+namespace FileSys {
+enum class ContentRecordType : u8;
+}
+
+namespace Service::AM::Applets {
+
+class WebBrowser final : public Applet {
+public:
+ WebBrowser(Core::System& system_, LibraryAppletMode applet_mode_,
+ const Core::Frontend::WebBrowserApplet& frontend_);
+
+ ~WebBrowser() override;
+
+ void Initialize() override;
+
+ bool TransactionComplete() const override;
+ ResultCode GetStatus() const override;
+ void ExecuteInteractive() override;
+ void Execute() override;
+
+ void ExtractOfflineRomFS();
+
+ void WebBrowserExit(WebExitReason exit_reason, std::string last_url = "");
+
+private:
+ bool InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const;
+
+ std::optional<std::vector<u8>> GetInputTLVData(WebArgInputTLVType input_tlv_type);
+
+ // Initializers for the various types of browser applets
+ void InitializeShop();
+ void InitializeLogin();
+ void InitializeOffline();
+ void InitializeShare();
+ void InitializeWeb();
+ void InitializeWifi();
+ void InitializeLobby();
+
+ // Executors for the various types of browser applets
+ void ExecuteShop();
+ void ExecuteLogin();
+ void ExecuteOffline();
+ void ExecuteShare();
+ void ExecuteWeb();
+ void ExecuteWifi();
+ void ExecuteLobby();
+
+ const Core::Frontend::WebBrowserApplet& frontend;
+
+ bool complete{false};
+ ResultCode status{ResultSuccess};
+
+ WebAppletVersion web_applet_version{};
+ WebArgHeader web_arg_header{};
+ WebArgInputTLVMap web_arg_input_tlv_map;
+
+ u64 title_id{};
+ FileSys::ContentRecordType nca_type{};
+ std::filesystem::path offline_cache_dir;
+ std::filesystem::path offline_document;
+ FileSys::VirtualFile offline_romfs;
+
+ std::string external_url;
+
+ Core::System& system;
+};
+
+} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/web_types.h b/src/core/hle/service/am/applets/applet_web_browser_types.h
index 419c2bf79..419c2bf79 100644
--- a/src/core/hle/service/am/applets/web_types.h
+++ b/src/core/hle/service/am/applets/applet_web_browser_types.h
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index ae995df6b..2b7685d42 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -17,13 +17,13 @@
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applet_ae.h"
#include "core/hle/service/am/applet_oe.h"
+#include "core/hle/service/am/applets/applet_controller.h"
+#include "core/hle/service/am/applets/applet_error.h"
+#include "core/hle/service/am/applets/applet_general_backend.h"
+#include "core/hle/service/am/applets/applet_profile_select.h"
+#include "core/hle/service/am/applets/applet_software_keyboard.h"
+#include "core/hle/service/am/applets/applet_web_browser.h"
#include "core/hle/service/am/applets/applets.h"
-#include "core/hle/service/am/applets/controller.h"
-#include "core/hle/service/am/applets/error.h"
-#include "core/hle/service/am/applets/general_backend.h"
-#include "core/hle/service/am/applets/profile_select.h"
-#include "core/hle/service/am/applets/software_keyboard.h"
-#include "core/hle/service/am/applets/web_browser.h"
#include "core/hle/service/sm/sm.h"
namespace Service::AM::Applets {
diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp
deleted file mode 100644
index 218c8d1e4..000000000
--- a/src/core/hle/service/am/applets/controller.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstring>
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/frontend/applets/controller.h"
-#include "core/hle/result.h"
-#include "core/hle/service/am/am.h"
-#include "core/hle/service/am/applets/controller.h"
-#include "core/hle/service/hid/controllers/npad.h"
-
-namespace Service::AM::Applets {
-
-// This error code (0x183ACA) is thrown when the applet fails to initialize.
-[[maybe_unused]] constexpr ResultCode ERR_CONTROLLER_APPLET_3101{ErrorModule::HID, 3101};
-// This error code (0x183CCA) is thrown when the u32 result in ControllerSupportResultInfo is 2.
-[[maybe_unused]] constexpr ResultCode ERR_CONTROLLER_APPLET_3102{ErrorModule::HID, 3102};
-
-static Core::Frontend::ControllerParameters ConvertToFrontendParameters(
- ControllerSupportArgPrivate private_arg, ControllerSupportArgHeader header, bool enable_text,
- std::vector<IdentificationColor> identification_colors, std::vector<ExplainText> text) {
- HID::Controller_NPad::NpadStyleSet npad_style_set;
- npad_style_set.raw = private_arg.style_set;
-
- return {
- .min_players = std::max(s8{1}, header.player_count_min),
- .max_players = header.player_count_max,
- .keep_controllers_connected = header.enable_take_over_connection,
- .enable_single_mode = header.enable_single_mode,
- .enable_border_color = header.enable_identification_color,
- .border_colors = std::move(identification_colors),
- .enable_explain_text = enable_text,
- .explain_text = std::move(text),
- .allow_pro_controller = npad_style_set.fullkey == 1,
- .allow_handheld = npad_style_set.handheld == 1,
- .allow_dual_joycons = npad_style_set.joycon_dual == 1,
- .allow_left_joycon = npad_style_set.joycon_left == 1,
- .allow_right_joycon = npad_style_set.joycon_right == 1,
- };
-}
-
-Controller::Controller(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::ControllerApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
-
-Controller::~Controller() = default;
-
-void Controller::Initialize() {
- Applet::Initialize();
-
- LOG_INFO(Service_HID, "Initializing Controller Applet.");
-
- LOG_DEBUG(Service_HID,
- "Initializing Applet with common_args: arg_version={}, lib_version={}, "
- "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
- common_args.arguments_version, common_args.library_version,
- common_args.play_startup_sound, common_args.size, common_args.system_tick,
- common_args.theme_color);
-
- controller_applet_version = ControllerAppletVersion{common_args.library_version};
-
- const auto private_arg_storage = broker.PopNormalDataToApplet();
- ASSERT(private_arg_storage != nullptr);
-
- const auto& private_arg = private_arg_storage->GetData();
- ASSERT(private_arg.size() == sizeof(ControllerSupportArgPrivate));
-
- std::memcpy(&controller_private_arg, private_arg.data(), private_arg.size());
- ASSERT_MSG(controller_private_arg.arg_private_size == sizeof(ControllerSupportArgPrivate),
- "Unknown ControllerSupportArgPrivate revision={} with size={}",
- controller_applet_version, controller_private_arg.arg_private_size);
-
- // Some games such as Cave Story+ set invalid values for the ControllerSupportMode.
- // Defer to arg_size to set the ControllerSupportMode.
- if (controller_private_arg.mode >= ControllerSupportMode::MaxControllerSupportMode) {
- switch (controller_private_arg.arg_size) {
- case sizeof(ControllerSupportArgOld):
- case sizeof(ControllerSupportArgNew):
- controller_private_arg.mode = ControllerSupportMode::ShowControllerSupport;
- break;
- case sizeof(ControllerUpdateFirmwareArg):
- controller_private_arg.mode = ControllerSupportMode::ShowControllerFirmwareUpdate;
- break;
- default:
- UNIMPLEMENTED_MSG("Unknown ControllerPrivateArg mode={} with arg_size={}",
- controller_private_arg.mode, controller_private_arg.arg_size);
- controller_private_arg.mode = ControllerSupportMode::ShowControllerSupport;
- break;
- }
- }
-
- // Some games such as Cave Story+ set invalid values for the ControllerSupportCaller.
- // This is always 0 (Application) except with ShowControllerFirmwareUpdateForSystem.
- if (controller_private_arg.caller >= ControllerSupportCaller::MaxControllerSupportCaller) {
- if (controller_private_arg.flag_1 &&
- controller_private_arg.mode == ControllerSupportMode::ShowControllerFirmwareUpdate) {
- controller_private_arg.caller = ControllerSupportCaller::System;
- } else {
- controller_private_arg.caller = ControllerSupportCaller::Application;
- }
- }
-
- switch (controller_private_arg.mode) {
- case ControllerSupportMode::ShowControllerSupport:
- case ControllerSupportMode::ShowControllerStrapGuide: {
- const auto user_arg_storage = broker.PopNormalDataToApplet();
- ASSERT(user_arg_storage != nullptr);
-
- const auto& user_arg = user_arg_storage->GetData();
- switch (controller_applet_version) {
- case ControllerAppletVersion::Version3:
- case ControllerAppletVersion::Version4:
- case ControllerAppletVersion::Version5:
- ASSERT(user_arg.size() == sizeof(ControllerSupportArgOld));
- std::memcpy(&controller_user_arg_old, user_arg.data(), user_arg.size());
- break;
- case ControllerAppletVersion::Version7:
- ASSERT(user_arg.size() == sizeof(ControllerSupportArgNew));
- std::memcpy(&controller_user_arg_new, user_arg.data(), user_arg.size());
- break;
- default:
- UNIMPLEMENTED_MSG("Unknown ControllerSupportArg revision={} with size={}",
- controller_applet_version, controller_private_arg.arg_size);
- ASSERT(user_arg.size() >= sizeof(ControllerSupportArgNew));
- std::memcpy(&controller_user_arg_new, user_arg.data(), sizeof(ControllerSupportArgNew));
- break;
- }
- break;
- }
- case ControllerSupportMode::ShowControllerFirmwareUpdate: {
- const auto update_arg_storage = broker.PopNormalDataToApplet();
- ASSERT(update_arg_storage != nullptr);
-
- const auto& update_arg = update_arg_storage->GetData();
- ASSERT(update_arg.size() == sizeof(ControllerUpdateFirmwareArg));
-
- std::memcpy(&controller_update_arg, update_arg.data(), update_arg.size());
- break;
- }
- default: {
- UNIMPLEMENTED_MSG("Unimplemented ControllerSupportMode={}", controller_private_arg.mode);
- break;
- }
- }
-}
-
-bool Controller::TransactionComplete() const {
- return complete;
-}
-
-ResultCode Controller::GetStatus() const {
- return status;
-}
-
-void Controller::ExecuteInteractive() {
- UNREACHABLE_MSG("Attempted to call interactive execution on non-interactive applet.");
-}
-
-void Controller::Execute() {
- switch (controller_private_arg.mode) {
- case ControllerSupportMode::ShowControllerSupport: {
- const auto parameters = [this] {
- switch (controller_applet_version) {
- case ControllerAppletVersion::Version3:
- case ControllerAppletVersion::Version4:
- case ControllerAppletVersion::Version5:
- return ConvertToFrontendParameters(
- controller_private_arg, controller_user_arg_old.header,
- controller_user_arg_old.enable_explain_text,
- std::vector<IdentificationColor>(
- controller_user_arg_old.identification_colors.begin(),
- controller_user_arg_old.identification_colors.end()),
- std::vector<ExplainText>(controller_user_arg_old.explain_text.begin(),
- controller_user_arg_old.explain_text.end()));
- case ControllerAppletVersion::Version7:
- default:
- return ConvertToFrontendParameters(
- controller_private_arg, controller_user_arg_new.header,
- controller_user_arg_new.enable_explain_text,
- std::vector<IdentificationColor>(
- controller_user_arg_new.identification_colors.begin(),
- controller_user_arg_new.identification_colors.end()),
- std::vector<ExplainText>(controller_user_arg_new.explain_text.begin(),
- controller_user_arg_new.explain_text.end()));
- }
- }();
-
- is_single_mode = parameters.enable_single_mode;
-
- LOG_DEBUG(Service_HID,
- "Controller Parameters: min_players={}, max_players={}, "
- "keep_controllers_connected={}, enable_single_mode={}, enable_border_color={}, "
- "enable_explain_text={}, allow_pro_controller={}, allow_handheld={}, "
- "allow_dual_joycons={}, allow_left_joycon={}, allow_right_joycon={}",
- parameters.min_players, parameters.max_players,
- parameters.keep_controllers_connected, parameters.enable_single_mode,
- parameters.enable_border_color, parameters.enable_explain_text,
- parameters.allow_pro_controller, parameters.allow_handheld,
- parameters.allow_dual_joycons, parameters.allow_left_joycon,
- parameters.allow_right_joycon);
-
- frontend.ReconfigureControllers([this] { ConfigurationComplete(); }, parameters);
- break;
- }
- case ControllerSupportMode::ShowControllerStrapGuide:
- case ControllerSupportMode::ShowControllerFirmwareUpdate:
- UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
- controller_private_arg.mode);
- ConfigurationComplete();
- break;
- default: {
- ConfigurationComplete();
- break;
- }
- }
-}
-
-void Controller::ConfigurationComplete() {
- ControllerSupportResultInfo result_info{};
-
- const auto& players = Settings::values.players.GetValue();
-
- // If enable_single_mode is enabled, player_count is 1 regardless of any other parameters.
- // Otherwise, only count connected players from P1-P8.
- result_info.player_count =
- is_single_mode
- ? 1
- : static_cast<s8>(std::count_if(players.begin(), players.end() - 2,
- [](const auto& player) { return player.connected; }));
-
- result_info.selected_id = HID::Controller_NPad::IndexToNPad(std::distance(
- players.begin(), std::find_if(players.begin(), players.end(),
- [](const auto& player) { return player.connected; })));
-
- result_info.result = 0;
-
- LOG_DEBUG(Service_HID, "Result Info: player_count={}, selected_id={}, result={}",
- result_info.player_count, result_info.selected_id, result_info.result);
-
- complete = true;
- out_data = std::vector<u8>(sizeof(ControllerSupportResultInfo));
- std::memcpy(out_data.data(), &result_info, out_data.size());
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
- broker.SignalStateChanged();
-}
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/controller.h b/src/core/hle/service/am/applets/controller.h
deleted file mode 100644
index 20617e91f..000000000
--- a/src/core/hle/service/am/applets/controller.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <vector>
-
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "core/hle/result.h"
-#include "core/hle/service/am/applets/applets.h"
-
-namespace Core {
-class System;
-}
-
-namespace Service::AM::Applets {
-
-using IdentificationColor = std::array<u8, 4>;
-using ExplainText = std::array<char, 0x81>;
-
-enum class ControllerAppletVersion : u32_le {
- Version3 = 0x3, // 1.0.0 - 2.3.0
- Version4 = 0x4, // 3.0.0 - 5.1.0
- Version5 = 0x5, // 6.0.0 - 7.0.1
- Version7 = 0x7, // 8.0.0+
-};
-
-enum class ControllerSupportMode : u8 {
- ShowControllerSupport,
- ShowControllerStrapGuide,
- ShowControllerFirmwareUpdate,
-
- MaxControllerSupportMode,
-};
-
-enum class ControllerSupportCaller : u8 {
- Application,
- System,
-
- MaxControllerSupportCaller,
-};
-
-struct ControllerSupportArgPrivate {
- u32 arg_private_size{};
- u32 arg_size{};
- bool flag_0{};
- bool flag_1{};
- ControllerSupportMode mode{};
- ControllerSupportCaller caller{};
- u32 style_set{};
- u32 joy_hold_type{};
-};
-static_assert(sizeof(ControllerSupportArgPrivate) == 0x14,
- "ControllerSupportArgPrivate has incorrect size.");
-
-struct ControllerSupportArgHeader {
- s8 player_count_min{};
- s8 player_count_max{};
- bool enable_take_over_connection{};
- bool enable_left_justify{};
- bool enable_permit_joy_dual{};
- bool enable_single_mode{};
- bool enable_identification_color{};
-};
-static_assert(sizeof(ControllerSupportArgHeader) == 0x7,
- "ControllerSupportArgHeader has incorrect size.");
-
-// LibraryAppletVersion 0x3, 0x4, 0x5
-struct ControllerSupportArgOld {
- ControllerSupportArgHeader header{};
- std::array<IdentificationColor, 4> identification_colors{};
- bool enable_explain_text{};
- std::array<ExplainText, 4> explain_text{};
-};
-static_assert(sizeof(ControllerSupportArgOld) == 0x21C,
- "ControllerSupportArgOld has incorrect size.");
-
-// LibraryAppletVersion 0x7
-struct ControllerSupportArgNew {
- ControllerSupportArgHeader header{};
- std::array<IdentificationColor, 8> identification_colors{};
- bool enable_explain_text{};
- std::array<ExplainText, 8> explain_text{};
-};
-static_assert(sizeof(ControllerSupportArgNew) == 0x430,
- "ControllerSupportArgNew has incorrect size.");
-
-struct ControllerUpdateFirmwareArg {
- bool enable_force_update{};
- INSERT_PADDING_BYTES(3);
-};
-static_assert(sizeof(ControllerUpdateFirmwareArg) == 0x4,
- "ControllerUpdateFirmwareArg has incorrect size.");
-
-struct ControllerSupportResultInfo {
- s8 player_count{};
- INSERT_PADDING_BYTES(3);
- u32 selected_id{};
- u32 result{};
-};
-static_assert(sizeof(ControllerSupportResultInfo) == 0xC,
- "ControllerSupportResultInfo has incorrect size.");
-
-class Controller final : public Applet {
-public:
- explicit Controller(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::ControllerApplet& frontend_);
- ~Controller() override;
-
- void Initialize() override;
-
- bool TransactionComplete() const override;
- ResultCode GetStatus() const override;
- void ExecuteInteractive() override;
- void Execute() override;
-
- void ConfigurationComplete();
-
-private:
- const Core::Frontend::ControllerApplet& frontend;
- Core::System& system;
-
- ControllerAppletVersion controller_applet_version;
- ControllerSupportArgPrivate controller_private_arg;
- ControllerSupportArgOld controller_user_arg_old;
- ControllerSupportArgNew controller_user_arg_new;
- ControllerUpdateFirmwareArg controller_update_arg;
- bool complete{false};
- ResultCode status{ResultSuccess};
- bool is_single_mode{false};
- std::vector<u8> out_data;
-};
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp
deleted file mode 100644
index c724e5d5b..000000000
--- a/src/core/hle/service/am/applets/error.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright 2019 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <cstring>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/frontend/applets/error.h"
-#include "core/hle/kernel/k_process.h"
-#include "core/hle/service/am/am.h"
-#include "core/hle/service/am/applets/error.h"
-#include "core/reporter.h"
-
-namespace Service::AM::Applets {
-
-#pragma pack(push, 4)
-struct ShowError {
- u8 mode;
- bool jump;
- INSERT_PADDING_BYTES_NOINIT(4);
- bool use_64bit_error_code;
- INSERT_PADDING_BYTES_NOINIT(1);
- u64 error_code_64;
- u32 error_code_32;
-};
-static_assert(sizeof(ShowError) == 0x14, "ShowError has incorrect size.");
-#pragma pack(pop)
-
-struct ShowErrorRecord {
- u8 mode;
- bool jump;
- INSERT_PADDING_BYTES_NOINIT(6);
- u64 error_code_64;
- u64 posix_time;
-};
-static_assert(sizeof(ShowErrorRecord) == 0x18, "ShowErrorRecord has incorrect size.");
-
-struct SystemErrorArg {
- u8 mode;
- bool jump;
- INSERT_PADDING_BYTES_NOINIT(6);
- u64 error_code_64;
- std::array<char, 8> language_code;
- std::array<char, 0x800> main_text;
- std::array<char, 0x800> detail_text;
-};
-static_assert(sizeof(SystemErrorArg) == 0x1018, "SystemErrorArg has incorrect size.");
-
-struct ApplicationErrorArg {
- u8 mode;
- bool jump;
- INSERT_PADDING_BYTES_NOINIT(6);
- u32 error_code;
- std::array<char, 8> language_code;
- std::array<char, 0x800> main_text;
- std::array<char, 0x800> detail_text;
-};
-static_assert(sizeof(ApplicationErrorArg) == 0x1014, "ApplicationErrorArg has incorrect size.");
-
-union Error::ErrorArguments {
- ShowError error;
- ShowErrorRecord error_record;
- SystemErrorArg system_error;
- ApplicationErrorArg application_error;
- std::array<u8, 0x1018> raw{};
-};
-
-namespace {
-template <typename T>
-void CopyArgumentData(const std::vector<u8>& data, T& variable) {
- ASSERT(data.size() >= sizeof(T));
- std::memcpy(&variable, data.data(), sizeof(T));
-}
-
-ResultCode Decode64BitError(u64 error) {
- const auto description = (error >> 32) & 0x1FFF;
- auto module = error & 0x3FF;
- if (module >= 2000)
- module -= 2000;
- module &= 0x1FF;
- return {static_cast<ErrorModule>(module), static_cast<u32>(description)};
-}
-
-} // Anonymous namespace
-
-Error::Error(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::ErrorApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
-
-Error::~Error() = default;
-
-void Error::Initialize() {
- Applet::Initialize();
- args = std::make_unique<ErrorArguments>();
- complete = false;
-
- const auto storage = broker.PopNormalDataToApplet();
- ASSERT(storage != nullptr);
- const auto data = storage->GetData();
-
- ASSERT(!data.empty());
- std::memcpy(&mode, data.data(), sizeof(ErrorAppletMode));
-
- switch (mode) {
- case ErrorAppletMode::ShowError:
- CopyArgumentData(data, args->error);
- if (args->error.use_64bit_error_code) {
- error_code = Decode64BitError(args->error.error_code_64);
- } else {
- error_code = ResultCode(args->error.error_code_32);
- }
- break;
- case ErrorAppletMode::ShowSystemError:
- CopyArgumentData(data, args->system_error);
- error_code = ResultCode(Decode64BitError(args->system_error.error_code_64));
- break;
- case ErrorAppletMode::ShowApplicationError:
- CopyArgumentData(data, args->application_error);
- error_code = ResultCode(args->application_error.error_code);
- break;
- case ErrorAppletMode::ShowErrorRecord:
- CopyArgumentData(data, args->error_record);
- error_code = Decode64BitError(args->error_record.error_code_64);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode);
- }
-}
-
-bool Error::TransactionComplete() const {
- return complete;
-}
-
-ResultCode Error::GetStatus() const {
- return ResultSuccess;
-}
-
-void Error::ExecuteInteractive() {
- UNREACHABLE_MSG("Unexpected interactive applet data!");
-}
-
-void Error::Execute() {
- if (complete) {
- return;
- }
-
- const auto callback = [this] { DisplayCompleted(); };
- const auto title_id = system.CurrentProcess()->GetTitleID();
- const auto& reporter{system.GetReporter()};
-
- switch (mode) {
- case ErrorAppletMode::ShowError:
- reporter.SaveErrorReport(title_id, error_code);
- frontend.ShowError(error_code, callback);
- break;
- case ErrorAppletMode::ShowSystemError:
- case ErrorAppletMode::ShowApplicationError: {
- const auto is_system = mode == ErrorAppletMode::ShowSystemError;
- const auto& main_text =
- is_system ? args->system_error.main_text : args->application_error.main_text;
- const auto& detail_text =
- is_system ? args->system_error.detail_text : args->application_error.detail_text;
-
- const auto main_text_string =
- Common::StringFromFixedZeroTerminatedBuffer(main_text.data(), main_text.size());
- const auto detail_text_string =
- Common::StringFromFixedZeroTerminatedBuffer(detail_text.data(), detail_text.size());
-
- reporter.SaveErrorReport(title_id, error_code, main_text_string, detail_text_string);
- frontend.ShowCustomErrorText(error_code, main_text_string, detail_text_string, callback);
- break;
- }
- case ErrorAppletMode::ShowErrorRecord:
- reporter.SaveErrorReport(title_id, error_code,
- fmt::format("{:016X}", args->error_record.posix_time));
- frontend.ShowErrorWithTimestamp(
- error_code, std::chrono::seconds{args->error_record.posix_time}, callback);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented LibAppletError mode={:02X}!", mode);
- DisplayCompleted();
- }
-}
-
-void Error::DisplayCompleted() {
- complete = true;
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{}));
- broker.SignalStateChanged();
-}
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp
deleted file mode 100644
index 9fcb9f95b..000000000
--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ /dev/null
@@ -1,255 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string_view>
-
-#include "common/assert.h"
-#include "common/hex_util.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/frontend/applets/general_frontend.h"
-#include "core/hle/kernel/k_process.h"
-#include "core/hle/result.h"
-#include "core/hle/service/am/am.h"
-#include "core/hle/service/am/applets/general_backend.h"
-#include "core/reporter.h"
-
-namespace Service::AM::Applets {
-
-constexpr ResultCode ERROR_INVALID_PIN{ErrorModule::PCTL, 221};
-
-static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) {
- std::shared_ptr<IStorage> storage = broker.PopNormalDataToApplet();
- for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) {
- const auto data = storage->GetData();
- LOG_INFO(Service_AM,
- "called (STUBBED), during {} received normal data with size={:08X}, data={}",
- prefix, data.size(), Common::HexToString(data));
- }
-
- storage = broker.PopInteractiveDataToApplet();
- for (; storage != nullptr; storage = broker.PopInteractiveDataToApplet()) {
- const auto data = storage->GetData();
- LOG_INFO(Service_AM,
- "called (STUBBED), during {} received interactive data with size={:08X}, data={}",
- prefix, data.size(), Common::HexToString(data));
- }
-}
-
-Auth::Auth(Core::System& system_, LibraryAppletMode applet_mode_,
- Core::Frontend::ParentalControlsApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
-
-Auth::~Auth() = default;
-
-void Auth::Initialize() {
- Applet::Initialize();
- complete = false;
-
- const auto storage = broker.PopNormalDataToApplet();
- ASSERT(storage != nullptr);
- const auto data = storage->GetData();
- ASSERT(data.size() >= 0xC);
-
- struct Arg {
- INSERT_PADDING_BYTES(4);
- AuthAppletType type;
- u8 arg0;
- u8 arg1;
- u8 arg2;
- INSERT_PADDING_BYTES(1);
- };
- static_assert(sizeof(Arg) == 0xC, "Arg (AuthApplet) has incorrect size.");
-
- Arg arg{};
- std::memcpy(&arg, data.data(), sizeof(Arg));
-
- type = arg.type;
- arg0 = arg.arg0;
- arg1 = arg.arg1;
- arg2 = arg.arg2;
-}
-
-bool Auth::TransactionComplete() const {
- return complete;
-}
-
-ResultCode Auth::GetStatus() const {
- return successful ? ResultSuccess : ERROR_INVALID_PIN;
-}
-
-void Auth::ExecuteInteractive() {
- UNREACHABLE_MSG("Unexpected interactive applet data.");
-}
-
-void Auth::Execute() {
- if (complete) {
- return;
- }
-
- const auto unimplemented_log = [this] {
- UNIMPLEMENTED_MSG("Unimplemented Auth applet type for type={:08X}, arg0={:02X}, "
- "arg1={:02X}, arg2={:02X}",
- type, arg0, arg1, arg2);
- };
-
- switch (type) {
- case AuthAppletType::ShowParentalAuthentication: {
- const auto callback = [this](bool is_successful) { AuthFinished(is_successful); };
-
- if (arg0 == 1 && arg1 == 0 && arg2 == 1) {
- // ShowAuthenticatorForConfiguration
- frontend.VerifyPINForSettings(callback);
- } else if (arg1 == 0 && arg2 == 0) {
- // ShowParentalAuthentication(bool)
- frontend.VerifyPIN(callback, static_cast<bool>(arg0));
- } else {
- unimplemented_log();
- }
- break;
- }
- case AuthAppletType::RegisterParentalPasscode: {
- const auto callback = [this] { AuthFinished(true); };
-
- if (arg0 == 0 && arg1 == 0 && arg2 == 0) {
- // RegisterParentalPasscode
- frontend.RegisterPIN(callback);
- } else {
- unimplemented_log();
- }
- break;
- }
- case AuthAppletType::ChangeParentalPasscode: {
- const auto callback = [this] { AuthFinished(true); };
-
- if (arg0 == 0 && arg1 == 0 && arg2 == 0) {
- // ChangeParentalPasscode
- frontend.ChangePIN(callback);
- } else {
- unimplemented_log();
- }
- break;
- }
- default:
- unimplemented_log();
- }
-}
-
-void Auth::AuthFinished(bool is_successful) {
- successful = is_successful;
-
- struct Return {
- ResultCode result_code;
- };
- static_assert(sizeof(Return) == 0x4, "Return (AuthApplet) has incorrect size.");
-
- Return return_{GetStatus()};
-
- std::vector<u8> out(sizeof(Return));
- std::memcpy(out.data(), &return_, sizeof(Return));
-
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out)));
- broker.SignalStateChanged();
-}
-
-PhotoViewer::PhotoViewer(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::PhotoViewerApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
-
-PhotoViewer::~PhotoViewer() = default;
-
-void PhotoViewer::Initialize() {
- Applet::Initialize();
- complete = false;
-
- const auto storage = broker.PopNormalDataToApplet();
- ASSERT(storage != nullptr);
- const auto data = storage->GetData();
- ASSERT(!data.empty());
- mode = static_cast<PhotoViewerAppletMode>(data[0]);
-}
-
-bool PhotoViewer::TransactionComplete() const {
- return complete;
-}
-
-ResultCode PhotoViewer::GetStatus() const {
- return ResultSuccess;
-}
-
-void PhotoViewer::ExecuteInteractive() {
- UNREACHABLE_MSG("Unexpected interactive applet data.");
-}
-
-void PhotoViewer::Execute() {
- if (complete)
- return;
-
- const auto callback = [this] { ViewFinished(); };
- switch (mode) {
- case PhotoViewerAppletMode::CurrentApp:
- frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback);
- break;
- case PhotoViewerAppletMode::AllApps:
- frontend.ShowAllPhotos(callback);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented PhotoViewer applet mode={:02X}!", mode);
- }
-}
-
-void PhotoViewer::ViewFinished() {
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{}));
- broker.SignalStateChanged();
-}
-
-StubApplet::StubApplet(Core::System& system_, AppletId id_, LibraryAppletMode applet_mode_)
- : Applet{system_, applet_mode_}, id{id_}, system{system_} {}
-
-StubApplet::~StubApplet() = default;
-
-void StubApplet::Initialize() {
- LOG_WARNING(Service_AM, "called (STUBBED)");
- Applet::Initialize();
-
- const auto data = broker.PeekDataToAppletForDebug();
- system.GetReporter().SaveUnimplementedAppletReport(
- static_cast<u32>(id), common_args.arguments_version, common_args.library_version,
- common_args.theme_color, common_args.play_startup_sound, common_args.system_tick,
- data.normal, data.interactive);
-
- LogCurrentStorage(broker, "Initialize");
-}
-
-bool StubApplet::TransactionComplete() const {
- LOG_WARNING(Service_AM, "called (STUBBED)");
- return true;
-}
-
-ResultCode StubApplet::GetStatus() const {
- LOG_WARNING(Service_AM, "called (STUBBED)");
- return ResultSuccess;
-}
-
-void StubApplet::ExecuteInteractive() {
- LOG_WARNING(Service_AM, "called (STUBBED)");
- LogCurrentStorage(broker, "ExecuteInteractive");
-
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
- broker.PushInteractiveDataFromApplet(
- std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
- broker.SignalStateChanged();
-}
-
-void StubApplet::Execute() {
- LOG_WARNING(Service_AM, "called (STUBBED)");
- LogCurrentStorage(broker, "Execute");
-
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
- broker.PushInteractiveDataFromApplet(
- std::make_shared<IStorage>(system, std::vector<u8>(0x1000)));
- broker.SignalStateChanged();
-}
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp
deleted file mode 100644
index 37048be26..000000000
--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include "common/assert.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/frontend/applets/profile_select.h"
-#include "core/hle/service/am/am.h"
-#include "core/hle/service/am/applets/profile_select.h"
-
-namespace Service::AM::Applets {
-
-constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1};
-
-ProfileSelect::ProfileSelect(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::ProfileSelectApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
-
-ProfileSelect::~ProfileSelect() = default;
-
-void ProfileSelect::Initialize() {
- complete = false;
- status = ResultSuccess;
- final_data.clear();
-
- Applet::Initialize();
-
- const auto user_config_storage = broker.PopNormalDataToApplet();
- ASSERT(user_config_storage != nullptr);
- const auto& user_config = user_config_storage->GetData();
-
- ASSERT(user_config.size() >= sizeof(UserSelectionConfig));
- std::memcpy(&config, user_config.data(), sizeof(UserSelectionConfig));
-}
-
-bool ProfileSelect::TransactionComplete() const {
- return complete;
-}
-
-ResultCode ProfileSelect::GetStatus() const {
- return status;
-}
-
-void ProfileSelect::ExecuteInteractive() {
- UNREACHABLE_MSG("Attempted to call interactive execution on non-interactive applet.");
-}
-
-void ProfileSelect::Execute() {
- if (complete) {
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(final_data)));
- return;
- }
-
- frontend.SelectProfile([this](std::optional<Common::UUID> uuid) { SelectionComplete(uuid); });
-}
-
-void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) {
- UserSelectionOutput output{};
-
- if (uuid.has_value() && uuid->uuid != Common::INVALID_UUID) {
- output.result = 0;
- output.uuid_selected = uuid->uuid;
- } else {
- status = ERR_USER_CANCELLED_SELECTION;
- output.result = ERR_USER_CANCELLED_SELECTION.raw;
- output.uuid_selected = Common::INVALID_UUID;
- }
-
- final_data = std::vector<u8>(sizeof(UserSelectionOutput));
- std::memcpy(final_data.data(), &output, final_data.size());
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(final_data)));
- broker.SignalStateChanged();
-}
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
deleted file mode 100644
index 00dfe1675..000000000
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ /dev/null
@@ -1,1082 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/frontend/applets/software_keyboard.h"
-#include "core/hle/service/am/am.h"
-#include "core/hle/service/am/applets/software_keyboard.h"
-
-namespace Service::AM::Applets {
-
-namespace {
-
-// The maximum number of UTF-16 characters that can be input into the swkbd text field.
-constexpr u32 DEFAULT_MAX_TEXT_LENGTH = 500;
-
-constexpr std::size_t REPLY_BASE_SIZE = sizeof(SwkbdState) + sizeof(SwkbdReplyType);
-constexpr std::size_t REPLY_UTF8_SIZE = 0x7D4;
-constexpr std::size_t REPLY_UTF16_SIZE = 0x3EC;
-
-constexpr const char* GetTextCheckResultName(SwkbdTextCheckResult text_check_result) {
- switch (text_check_result) {
- case SwkbdTextCheckResult::Success:
- return "Success";
- case SwkbdTextCheckResult::Failure:
- return "Failure";
- case SwkbdTextCheckResult::Confirm:
- return "Confirm";
- case SwkbdTextCheckResult::Silent:
- return "Silent";
- default:
- UNIMPLEMENTED_MSG("Unknown TextCheckResult={}", text_check_result);
- return "Unknown";
- }
-}
-
-void SetReplyBase(std::vector<u8>& reply, SwkbdState state, SwkbdReplyType reply_type) {
- std::memcpy(reply.data(), &state, sizeof(SwkbdState));
- std::memcpy(reply.data() + sizeof(SwkbdState), &reply_type, sizeof(SwkbdReplyType));
-}
-
-} // Anonymous namespace
-
-SoftwareKeyboard::SoftwareKeyboard(Core::System& system_, LibraryAppletMode applet_mode_,
- Core::Frontend::SoftwareKeyboardApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend{frontend_}, system{system_} {}
-
-SoftwareKeyboard::~SoftwareKeyboard() = default;
-
-void SoftwareKeyboard::Initialize() {
- Applet::Initialize();
-
- LOG_INFO(Service_AM, "Initializing Software Keyboard Applet with LibraryAppletMode={}",
- applet_mode);
-
- LOG_DEBUG(Service_AM,
- "Initializing Applet with common_args: arg_version={}, lib_version={}, "
- "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
- common_args.arguments_version, common_args.library_version,
- common_args.play_startup_sound, common_args.size, common_args.system_tick,
- common_args.theme_color);
-
- swkbd_applet_version = SwkbdAppletVersion{common_args.library_version};
-
- switch (applet_mode) {
- case LibraryAppletMode::AllForeground:
- InitializeForeground();
- break;
- case LibraryAppletMode::Background:
- case LibraryAppletMode::BackgroundIndirectDisplay:
- InitializeBackground(applet_mode);
- break;
- default:
- UNREACHABLE_MSG("Invalid LibraryAppletMode={}", applet_mode);
- break;
- }
-}
-
-bool SoftwareKeyboard::TransactionComplete() const {
- return complete;
-}
-
-ResultCode SoftwareKeyboard::GetStatus() const {
- return status;
-}
-
-void SoftwareKeyboard::ExecuteInteractive() {
- if (complete) {
- return;
- }
-
- if (is_background) {
- ProcessInlineKeyboardRequest();
- } else {
- ProcessTextCheck();
- }
-}
-
-void SoftwareKeyboard::Execute() {
- if (complete) {
- return;
- }
-
- if (is_background) {
- return;
- }
-
- ShowNormalKeyboard();
-}
-
-void SoftwareKeyboard::SubmitTextNormal(SwkbdResult result, std::u16string submitted_text) {
- if (complete) {
- return;
- }
-
- if (swkbd_config_common.use_text_check && result == SwkbdResult::Ok) {
- SubmitForTextCheck(submitted_text);
- } else {
- SubmitNormalOutputAndExit(result, submitted_text);
- }
-}
-
-void SoftwareKeyboard::SubmitTextInline(SwkbdReplyType reply_type, std::u16string submitted_text,
- s32 cursor_position) {
- if (complete) {
- return;
- }
-
- current_text = std::move(submitted_text);
- current_cursor_position = cursor_position;
-
- if (inline_use_utf8) {
- switch (reply_type) {
- case SwkbdReplyType::ChangedString:
- reply_type = SwkbdReplyType::ChangedStringUtf8;
- break;
- case SwkbdReplyType::MovedCursor:
- reply_type = SwkbdReplyType::MovedCursorUtf8;
- break;
- case SwkbdReplyType::DecidedEnter:
- reply_type = SwkbdReplyType::DecidedEnterUtf8;
- break;
- default:
- break;
- }
- }
-
- if (use_changed_string_v2) {
- switch (reply_type) {
- case SwkbdReplyType::ChangedString:
- reply_type = SwkbdReplyType::ChangedStringV2;
- break;
- case SwkbdReplyType::ChangedStringUtf8:
- reply_type = SwkbdReplyType::ChangedStringUtf8V2;
- break;
- default:
- break;
- }
- }
-
- if (use_moved_cursor_v2) {
- switch (reply_type) {
- case SwkbdReplyType::MovedCursor:
- reply_type = SwkbdReplyType::MovedCursorV2;
- break;
- case SwkbdReplyType::MovedCursorUtf8:
- reply_type = SwkbdReplyType::MovedCursorUtf8V2;
- break;
- default:
- break;
- }
- }
-
- SendReply(reply_type);
-}
-
-void SoftwareKeyboard::InitializeForeground() {
- LOG_INFO(Service_AM, "Initializing Normal Software Keyboard Applet.");
-
- is_background = false;
-
- const auto swkbd_config_storage = broker.PopNormalDataToApplet();
- ASSERT(swkbd_config_storage != nullptr);
-
- const auto& swkbd_config_data = swkbd_config_storage->GetData();
- ASSERT(swkbd_config_data.size() >= sizeof(SwkbdConfigCommon));
-
- std::memcpy(&swkbd_config_common, swkbd_config_data.data(), sizeof(SwkbdConfigCommon));
-
- switch (swkbd_applet_version) {
- case SwkbdAppletVersion::Version5:
- case SwkbdAppletVersion::Version65542:
- ASSERT(swkbd_config_data.size() == sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigOld));
- std::memcpy(&swkbd_config_old, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
- sizeof(SwkbdConfigOld));
- break;
- case SwkbdAppletVersion::Version196615:
- case SwkbdAppletVersion::Version262152:
- case SwkbdAppletVersion::Version327689:
- ASSERT(swkbd_config_data.size() == sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigOld2));
- std::memcpy(&swkbd_config_old2, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
- sizeof(SwkbdConfigOld2));
- break;
- case SwkbdAppletVersion::Version393227:
- case SwkbdAppletVersion::Version524301:
- ASSERT(swkbd_config_data.size() == sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigNew));
- std::memcpy(&swkbd_config_new, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
- sizeof(SwkbdConfigNew));
- break;
- default:
- UNIMPLEMENTED_MSG("Unknown SwkbdConfig revision={} with size={}", swkbd_applet_version,
- swkbd_config_data.size());
- ASSERT(swkbd_config_data.size() >= sizeof(SwkbdConfigCommon) + sizeof(SwkbdConfigNew));
- std::memcpy(&swkbd_config_new, swkbd_config_data.data() + sizeof(SwkbdConfigCommon),
- sizeof(SwkbdConfigNew));
- break;
- }
-
- const auto work_buffer_storage = broker.PopNormalDataToApplet();
- ASSERT(work_buffer_storage != nullptr);
-
- if (swkbd_config_common.initial_string_length == 0) {
- InitializeFrontendKeyboard();
- return;
- }
-
- const auto& work_buffer = work_buffer_storage->GetData();
-
- std::vector<char16_t> initial_string(swkbd_config_common.initial_string_length);
-
- std::memcpy(initial_string.data(),
- work_buffer.data() + swkbd_config_common.initial_string_offset,
- swkbd_config_common.initial_string_length * sizeof(char16_t));
-
- initial_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(initial_string.data(),
- initial_string.size());
-
- LOG_DEBUG(Service_AM, "\nInitial Text: {}", Common::UTF16ToUTF8(initial_text));
-
- InitializeFrontendKeyboard();
-}
-
-void SoftwareKeyboard::InitializeBackground(LibraryAppletMode library_applet_mode) {
- LOG_INFO(Service_AM, "Initializing Inline Software Keyboard Applet.");
-
- is_background = true;
-
- const auto swkbd_inline_initialize_arg_storage = broker.PopNormalDataToApplet();
- ASSERT(swkbd_inline_initialize_arg_storage != nullptr);
-
- const auto& swkbd_inline_initialize_arg = swkbd_inline_initialize_arg_storage->GetData();
- ASSERT(swkbd_inline_initialize_arg.size() == sizeof(SwkbdInitializeArg));
-
- std::memcpy(&swkbd_initialize_arg, swkbd_inline_initialize_arg.data(),
- swkbd_inline_initialize_arg.size());
-
- if (swkbd_initialize_arg.library_applet_mode_flag) {
- ASSERT(library_applet_mode == LibraryAppletMode::Background);
- } else {
- ASSERT(library_applet_mode == LibraryAppletMode::BackgroundIndirectDisplay);
- }
-}
-
-void SoftwareKeyboard::ProcessTextCheck() {
- const auto text_check_storage = broker.PopInteractiveDataToApplet();
- ASSERT(text_check_storage != nullptr);
-
- const auto& text_check_data = text_check_storage->GetData();
- ASSERT(text_check_data.size() == sizeof(SwkbdTextCheck));
-
- SwkbdTextCheck swkbd_text_check;
-
- std::memcpy(&swkbd_text_check, text_check_data.data(), sizeof(SwkbdTextCheck));
-
- std::u16string text_check_message =
- swkbd_text_check.text_check_result == SwkbdTextCheckResult::Failure ||
- swkbd_text_check.text_check_result == SwkbdTextCheckResult::Confirm
- ? Common::UTF16StringFromFixedZeroTerminatedBuffer(
- swkbd_text_check.text_check_message.data(),
- swkbd_text_check.text_check_message.size())
- : u"";
-
- LOG_INFO(Service_AM, "\nTextCheckResult: {}\nTextCheckMessage: {}",
- GetTextCheckResultName(swkbd_text_check.text_check_result),
- Common::UTF16ToUTF8(text_check_message));
-
- switch (swkbd_text_check.text_check_result) {
- case SwkbdTextCheckResult::Success:
- SubmitNormalOutputAndExit(SwkbdResult::Ok, current_text);
- break;
- case SwkbdTextCheckResult::Failure:
- ShowTextCheckDialog(SwkbdTextCheckResult::Failure, std::move(text_check_message));
- break;
- case SwkbdTextCheckResult::Confirm:
- ShowTextCheckDialog(SwkbdTextCheckResult::Confirm, std::move(text_check_message));
- break;
- case SwkbdTextCheckResult::Silent:
- default:
- break;
- }
-}
-
-void SoftwareKeyboard::ProcessInlineKeyboardRequest() {
- const auto request_data_storage = broker.PopInteractiveDataToApplet();
- ASSERT(request_data_storage != nullptr);
-
- const auto& request_data = request_data_storage->GetData();
- ASSERT(request_data.size() >= sizeof(SwkbdRequestCommand));
-
- SwkbdRequestCommand request_command;
-
- std::memcpy(&request_command, request_data.data(), sizeof(SwkbdRequestCommand));
-
- switch (request_command) {
- case SwkbdRequestCommand::Finalize:
- RequestFinalize(request_data);
- break;
- case SwkbdRequestCommand::SetUserWordInfo:
- RequestSetUserWordInfo(request_data);
- break;
- case SwkbdRequestCommand::SetCustomizeDic:
- RequestSetCustomizeDic(request_data);
- break;
- case SwkbdRequestCommand::Calc:
- RequestCalc(request_data);
- break;
- case SwkbdRequestCommand::SetCustomizedDictionaries:
- RequestSetCustomizedDictionaries(request_data);
- break;
- case SwkbdRequestCommand::UnsetCustomizedDictionaries:
- RequestUnsetCustomizedDictionaries(request_data);
- break;
- case SwkbdRequestCommand::SetChangedStringV2Flag:
- RequestSetChangedStringV2Flag(request_data);
- break;
- case SwkbdRequestCommand::SetMovedCursorV2Flag:
- RequestSetMovedCursorV2Flag(request_data);
- break;
- default:
- UNIMPLEMENTED_MSG("Unknown SwkbdRequestCommand={}", request_command);
- break;
- }
-}
-
-void SoftwareKeyboard::SubmitNormalOutputAndExit(SwkbdResult result,
- std::u16string submitted_text) {
- std::vector<u8> out_data(sizeof(SwkbdResult) + STRING_BUFFER_SIZE);
-
- if (swkbd_config_common.use_utf8) {
- std::string utf8_submitted_text = Common::UTF16ToUTF8(submitted_text);
-
- LOG_DEBUG(Service_AM, "\nSwkbdResult: {}\nUTF-8 Submitted Text: {}", result,
- utf8_submitted_text);
-
- std::memcpy(out_data.data(), &result, sizeof(SwkbdResult));
- std::memcpy(out_data.data() + sizeof(SwkbdResult), utf8_submitted_text.data(),
- utf8_submitted_text.size());
- } else {
- LOG_DEBUG(Service_AM, "\nSwkbdResult: {}\nUTF-16 Submitted Text: {}", result,
- Common::UTF16ToUTF8(submitted_text));
-
- std::memcpy(out_data.data(), &result, sizeof(SwkbdResult));
- std::memcpy(out_data.data() + sizeof(SwkbdResult), submitted_text.data(),
- submitted_text.size() * sizeof(char16_t));
- }
-
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
-
- ExitKeyboard();
-}
-
-void SoftwareKeyboard::SubmitForTextCheck(std::u16string submitted_text) {
- current_text = std::move(submitted_text);
-
- std::vector<u8> out_data(sizeof(u64) + STRING_BUFFER_SIZE);
-
- if (swkbd_config_common.use_utf8) {
- std::string utf8_submitted_text = Common::UTF16ToUTF8(current_text);
- const u64 buffer_size = sizeof(u64) + utf8_submitted_text.size();
-
- LOG_DEBUG(Service_AM, "\nBuffer Size: {}\nUTF-8 Submitted Text: {}", buffer_size,
- utf8_submitted_text);
-
- std::memcpy(out_data.data(), &buffer_size, sizeof(u64));
- std::memcpy(out_data.data() + sizeof(u64), utf8_submitted_text.data(),
- utf8_submitted_text.size());
- } else {
- const u64 buffer_size = sizeof(u64) + current_text.size() * sizeof(char16_t);
-
- LOG_DEBUG(Service_AM, "\nBuffer Size: {}\nUTF-16 Submitted Text: {}", buffer_size,
- Common::UTF16ToUTF8(current_text));
-
- std::memcpy(out_data.data(), &buffer_size, sizeof(u64));
- std::memcpy(out_data.data() + sizeof(u64), current_text.data(),
- current_text.size() * sizeof(char16_t));
- }
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
-}
-
-void SoftwareKeyboard::SendReply(SwkbdReplyType reply_type) {
- switch (reply_type) {
- case SwkbdReplyType::FinishedInitialize:
- ReplyFinishedInitialize();
- break;
- case SwkbdReplyType::Default:
- ReplyDefault();
- break;
- case SwkbdReplyType::ChangedString:
- ReplyChangedString();
- break;
- case SwkbdReplyType::MovedCursor:
- ReplyMovedCursor();
- break;
- case SwkbdReplyType::MovedTab:
- ReplyMovedTab();
- break;
- case SwkbdReplyType::DecidedEnter:
- ReplyDecidedEnter();
- break;
- case SwkbdReplyType::DecidedCancel:
- ReplyDecidedCancel();
- break;
- case SwkbdReplyType::ChangedStringUtf8:
- ReplyChangedStringUtf8();
- break;
- case SwkbdReplyType::MovedCursorUtf8:
- ReplyMovedCursorUtf8();
- break;
- case SwkbdReplyType::DecidedEnterUtf8:
- ReplyDecidedEnterUtf8();
- break;
- case SwkbdReplyType::UnsetCustomizeDic:
- ReplyUnsetCustomizeDic();
- break;
- case SwkbdReplyType::ReleasedUserWordInfo:
- ReplyReleasedUserWordInfo();
- break;
- case SwkbdReplyType::UnsetCustomizedDictionaries:
- ReplyUnsetCustomizedDictionaries();
- break;
- case SwkbdReplyType::ChangedStringV2:
- ReplyChangedStringV2();
- break;
- case SwkbdReplyType::MovedCursorV2:
- ReplyMovedCursorV2();
- break;
- case SwkbdReplyType::ChangedStringUtf8V2:
- ReplyChangedStringUtf8V2();
- break;
- case SwkbdReplyType::MovedCursorUtf8V2:
- ReplyMovedCursorUtf8V2();
- break;
- default:
- UNIMPLEMENTED_MSG("Unknown SwkbdReplyType={}", reply_type);
- ReplyDefault();
- break;
- }
-}
-
-void SoftwareKeyboard::ChangeState(SwkbdState state) {
- swkbd_state = state;
-
- ReplyDefault();
-}
-
-void SoftwareKeyboard::InitializeFrontendKeyboard() {
- if (is_background) {
- const auto& appear_arg = swkbd_calc_arg.appear_arg;
-
- std::u16string ok_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
- appear_arg.ok_text.data(), appear_arg.ok_text.size());
-
- const u32 max_text_length =
- appear_arg.max_text_length > 0 && appear_arg.max_text_length <= DEFAULT_MAX_TEXT_LENGTH
- ? appear_arg.max_text_length
- : DEFAULT_MAX_TEXT_LENGTH;
-
- const u32 min_text_length =
- appear_arg.min_text_length <= max_text_length ? appear_arg.min_text_length : 0;
-
- const s32 initial_cursor_position =
- current_cursor_position > 0 ? current_cursor_position : 0;
-
- const auto text_draw_type =
- max_text_length <= 32 ? SwkbdTextDrawType::Line : SwkbdTextDrawType::Box;
-
- Core::Frontend::KeyboardInitializeParameters initialize_parameters{
- .ok_text{std::move(ok_text)},
- .header_text{},
- .sub_text{},
- .guide_text{},
- .initial_text{current_text},
- .max_text_length{max_text_length},
- .min_text_length{min_text_length},
- .initial_cursor_position{initial_cursor_position},
- .type{appear_arg.type},
- .password_mode{SwkbdPasswordMode::Disabled},
- .text_draw_type{text_draw_type},
- .key_disable_flags{appear_arg.key_disable_flags},
- .use_blur_background{false},
- .enable_backspace_button{swkbd_calc_arg.enable_backspace_button},
- .enable_return_button{appear_arg.enable_return_button},
- .disable_cancel_button{appear_arg.disable_cancel_button},
- };
-
- frontend.InitializeKeyboard(
- true, std::move(initialize_parameters), {},
- [this](SwkbdReplyType reply_type, std::u16string submitted_text, s32 cursor_position) {
- SubmitTextInline(reply_type, submitted_text, cursor_position);
- });
- } else {
- std::u16string ok_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
- swkbd_config_common.ok_text.data(), swkbd_config_common.ok_text.size());
-
- std::u16string header_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
- swkbd_config_common.header_text.data(), swkbd_config_common.header_text.size());
-
- std::u16string sub_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
- swkbd_config_common.sub_text.data(), swkbd_config_common.sub_text.size());
-
- std::u16string guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
- swkbd_config_common.guide_text.data(), swkbd_config_common.guide_text.size());
-
- const u32 max_text_length =
- swkbd_config_common.max_text_length > 0 &&
- swkbd_config_common.max_text_length <= DEFAULT_MAX_TEXT_LENGTH
- ? swkbd_config_common.max_text_length
- : DEFAULT_MAX_TEXT_LENGTH;
-
- const u32 min_text_length = swkbd_config_common.min_text_length <= max_text_length
- ? swkbd_config_common.min_text_length
- : 0;
-
- const s32 initial_cursor_position = [this] {
- switch (swkbd_config_common.initial_cursor_position) {
- case SwkbdInitialCursorPosition::Start:
- default:
- return 0;
- case SwkbdInitialCursorPosition::End:
- return static_cast<s32>(initial_text.size());
- }
- }();
-
- const auto text_draw_type = [this, max_text_length] {
- switch (swkbd_config_common.text_draw_type) {
- case SwkbdTextDrawType::Line:
- default:
- return max_text_length <= 32 ? SwkbdTextDrawType::Line : SwkbdTextDrawType::Box;
- case SwkbdTextDrawType::Box:
- case SwkbdTextDrawType::DownloadCode:
- return swkbd_config_common.text_draw_type;
- }
- }();
-
- const auto enable_return_button = text_draw_type == SwkbdTextDrawType::Box
- ? swkbd_config_common.enable_return_button
- : false;
-
- const auto disable_cancel_button = swkbd_applet_version >= SwkbdAppletVersion::Version393227
- ? swkbd_config_new.disable_cancel_button
- : false;
-
- Core::Frontend::KeyboardInitializeParameters initialize_parameters{
- .ok_text{std::move(ok_text)},
- .header_text{std::move(header_text)},
- .sub_text{std::move(sub_text)},
- .guide_text{std::move(guide_text)},
- .initial_text{initial_text},
- .max_text_length{max_text_length},
- .min_text_length{min_text_length},
- .initial_cursor_position{initial_cursor_position},
- .type{swkbd_config_common.type},
- .password_mode{swkbd_config_common.password_mode},
- .text_draw_type{text_draw_type},
- .key_disable_flags{swkbd_config_common.key_disable_flags},
- .use_blur_background{swkbd_config_common.use_blur_background},
- .enable_backspace_button{true},
- .enable_return_button{enable_return_button},
- .disable_cancel_button{disable_cancel_button},
- };
-
- frontend.InitializeKeyboard(false, std::move(initialize_parameters),
- [this](SwkbdResult result, std::u16string submitted_text) {
- SubmitTextNormal(result, submitted_text);
- },
- {});
- }
-}
-
-void SoftwareKeyboard::ShowNormalKeyboard() {
- frontend.ShowNormalKeyboard();
-}
-
-void SoftwareKeyboard::ShowTextCheckDialog(SwkbdTextCheckResult text_check_result,
- std::u16string text_check_message) {
- frontend.ShowTextCheckDialog(text_check_result, std::move(text_check_message));
-}
-
-void SoftwareKeyboard::ShowInlineKeyboard() {
- if (swkbd_state != SwkbdState::InitializedIsHidden) {
- return;
- }
-
- ChangeState(SwkbdState::InitializedIsAppearing);
-
- const auto& appear_arg = swkbd_calc_arg.appear_arg;
-
- const u32 max_text_length =
- appear_arg.max_text_length > 0 && appear_arg.max_text_length <= DEFAULT_MAX_TEXT_LENGTH
- ? appear_arg.max_text_length
- : DEFAULT_MAX_TEXT_LENGTH;
-
- const u32 min_text_length =
- appear_arg.min_text_length <= max_text_length ? appear_arg.min_text_length : 0;
-
- Core::Frontend::InlineAppearParameters appear_parameters{
- .max_text_length{max_text_length},
- .min_text_length{min_text_length},
- .key_top_scale_x{swkbd_calc_arg.key_top_scale_x},
- .key_top_scale_y{swkbd_calc_arg.key_top_scale_y},
- .key_top_translate_x{swkbd_calc_arg.key_top_translate_x},
- .key_top_translate_y{swkbd_calc_arg.key_top_translate_y},
- .type{appear_arg.type},
- .key_disable_flags{appear_arg.key_disable_flags},
- .key_top_as_floating{swkbd_calc_arg.key_top_as_floating},
- .enable_backspace_button{swkbd_calc_arg.enable_backspace_button},
- .enable_return_button{appear_arg.enable_return_button},
- .disable_cancel_button{appear_arg.disable_cancel_button},
- };
-
- frontend.ShowInlineKeyboard(std::move(appear_parameters));
-
- ChangeState(SwkbdState::InitializedIsShown);
-}
-
-void SoftwareKeyboard::HideInlineKeyboard() {
- if (swkbd_state != SwkbdState::InitializedIsShown) {
- return;
- }
-
- ChangeState(SwkbdState::InitializedIsDisappearing);
-
- frontend.HideInlineKeyboard();
-
- ChangeState(SwkbdState::InitializedIsHidden);
-}
-
-void SoftwareKeyboard::InlineTextChanged() {
- Core::Frontend::InlineTextParameters text_parameters{
- .input_text{current_text},
- .cursor_position{current_cursor_position},
- };
-
- frontend.InlineTextChanged(std::move(text_parameters));
-}
-
-void SoftwareKeyboard::ExitKeyboard() {
- complete = true;
- status = ResultSuccess;
-
- frontend.ExitKeyboard();
-
- broker.SignalStateChanged();
-}
-
-// Inline Software Keyboard Requests
-
-void SoftwareKeyboard::RequestFinalize(const std::vector<u8>& request_data) {
- LOG_DEBUG(Service_AM, "Processing Request: Finalize");
-
- ChangeState(SwkbdState::NotInitialized);
-
- ExitKeyboard();
-}
-
-void SoftwareKeyboard::RequestSetUserWordInfo(const std::vector<u8>& request_data) {
- LOG_WARNING(Service_AM, "SetUserWordInfo is not implemented.");
-}
-
-void SoftwareKeyboard::RequestSetCustomizeDic(const std::vector<u8>& request_data) {
- LOG_WARNING(Service_AM, "SetCustomizeDic is not implemented.");
-}
-
-void SoftwareKeyboard::RequestCalc(const std::vector<u8>& request_data) {
- LOG_DEBUG(Service_AM, "Processing Request: Calc");
-
- ASSERT(request_data.size() == sizeof(SwkbdRequestCommand) + sizeof(SwkbdCalcArg));
-
- std::memcpy(&swkbd_calc_arg, request_data.data() + sizeof(SwkbdRequestCommand),
- sizeof(SwkbdCalcArg));
-
- if (swkbd_calc_arg.flags.set_input_text) {
- current_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(
- swkbd_calc_arg.input_text.data(), swkbd_calc_arg.input_text.size());
- }
-
- if (swkbd_calc_arg.flags.set_cursor_position) {
- current_cursor_position = swkbd_calc_arg.cursor_position;
- }
-
- if (swkbd_calc_arg.flags.set_utf8_mode) {
- inline_use_utf8 = swkbd_calc_arg.utf8_mode;
- }
-
- if (swkbd_state <= SwkbdState::InitializedIsHidden &&
- swkbd_calc_arg.flags.unset_customize_dic) {
- ReplyUnsetCustomizeDic();
- }
-
- if (swkbd_state <= SwkbdState::InitializedIsHidden &&
- swkbd_calc_arg.flags.unset_user_word_info) {
- ReplyReleasedUserWordInfo();
- }
-
- if (swkbd_state == SwkbdState::NotInitialized && swkbd_calc_arg.flags.set_initialize_arg) {
- InitializeFrontendKeyboard();
-
- ChangeState(SwkbdState::InitializedIsHidden);
-
- ReplyFinishedInitialize();
- }
-
- if (!swkbd_calc_arg.flags.set_initialize_arg &&
- (swkbd_calc_arg.flags.set_input_text || swkbd_calc_arg.flags.set_cursor_position)) {
- InlineTextChanged();
- }
-
- if (swkbd_state == SwkbdState::InitializedIsHidden && swkbd_calc_arg.flags.appear) {
- ShowInlineKeyboard();
- return;
- }
-
- if (swkbd_state == SwkbdState::InitializedIsShown && swkbd_calc_arg.flags.disappear) {
- HideInlineKeyboard();
- return;
- }
-}
-
-void SoftwareKeyboard::RequestSetCustomizedDictionaries(const std::vector<u8>& request_data) {
- LOG_WARNING(Service_AM, "SetCustomizedDictionaries is not implemented.");
-}
-
-void SoftwareKeyboard::RequestUnsetCustomizedDictionaries(const std::vector<u8>& request_data) {
- LOG_WARNING(Service_AM, "(STUBBED) Processing Request: UnsetCustomizedDictionaries");
-
- ReplyUnsetCustomizedDictionaries();
-}
-
-void SoftwareKeyboard::RequestSetChangedStringV2Flag(const std::vector<u8>& request_data) {
- LOG_DEBUG(Service_AM, "Processing Request: SetChangedStringV2Flag");
-
- ASSERT(request_data.size() == sizeof(SwkbdRequestCommand) + 1);
-
- std::memcpy(&use_changed_string_v2, request_data.data() + sizeof(SwkbdRequestCommand), 1);
-}
-
-void SoftwareKeyboard::RequestSetMovedCursorV2Flag(const std::vector<u8>& request_data) {
- LOG_DEBUG(Service_AM, "Processing Request: SetMovedCursorV2Flag");
-
- ASSERT(request_data.size() == sizeof(SwkbdRequestCommand) + 1);
-
- std::memcpy(&use_moved_cursor_v2, request_data.data() + sizeof(SwkbdRequestCommand), 1);
-}
-
-// Inline Software Keyboard Replies
-
-void SoftwareKeyboard::ReplyFinishedInitialize() {
- LOG_DEBUG(Service_AM, "Sending Reply: FinishedInitialize");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + 1);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::FinishedInitialize);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyDefault() {
- LOG_DEBUG(Service_AM, "Sending Reply: Default");
-
- std::vector<u8> reply(REPLY_BASE_SIZE);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::Default);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyChangedString() {
- LOG_DEBUG(Service_AM, "Sending Reply: ChangedString");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdChangedStringArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedString);
-
- const SwkbdChangedStringArg changed_string_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .dictionary_start_cursor_position{-1},
- .dictionary_end_cursor_position{-1},
- .cursor_position{current_cursor_position},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
- current_text.size() * sizeof(char16_t));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &changed_string_arg,
- sizeof(SwkbdChangedStringArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyMovedCursor() {
- LOG_DEBUG(Service_AM, "Sending Reply: MovedCursor");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedCursorArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursor);
-
- const SwkbdMovedCursorArg moved_cursor_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .cursor_position{current_cursor_position},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
- current_text.size() * sizeof(char16_t));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &moved_cursor_arg,
- sizeof(SwkbdMovedCursorArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyMovedTab() {
- LOG_DEBUG(Service_AM, "Sending Reply: MovedTab");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedTabArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedTab);
-
- const SwkbdMovedTabArg moved_tab_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .cursor_position{current_cursor_position},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
- current_text.size() * sizeof(char16_t));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &moved_tab_arg,
- sizeof(SwkbdMovedTabArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyDecidedEnter() {
- LOG_DEBUG(Service_AM, "Sending Reply: DecidedEnter");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdDecidedEnterArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::DecidedEnter);
-
- const SwkbdDecidedEnterArg decided_enter_arg{
- .text_length{static_cast<u32>(current_text.size())},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
- current_text.size() * sizeof(char16_t));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &decided_enter_arg,
- sizeof(SwkbdDecidedEnterArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-
- HideInlineKeyboard();
-}
-
-void SoftwareKeyboard::ReplyDecidedCancel() {
- LOG_DEBUG(Service_AM, "Sending Reply: DecidedCancel");
-
- std::vector<u8> reply(REPLY_BASE_SIZE);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::DecidedCancel);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-
- HideInlineKeyboard();
-}
-
-void SoftwareKeyboard::ReplyChangedStringUtf8() {
- LOG_DEBUG(Service_AM, "Sending Reply: ChangedStringUtf8");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdChangedStringArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedStringUtf8);
-
- std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
-
- const SwkbdChangedStringArg changed_string_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .dictionary_start_cursor_position{-1},
- .dictionary_end_cursor_position{-1},
- .cursor_position{current_cursor_position},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &changed_string_arg,
- sizeof(SwkbdChangedStringArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyMovedCursorUtf8() {
- LOG_DEBUG(Service_AM, "Sending Reply: MovedCursorUtf8");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdMovedCursorArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursorUtf8);
-
- std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
-
- const SwkbdMovedCursorArg moved_cursor_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .cursor_position{current_cursor_position},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &moved_cursor_arg,
- sizeof(SwkbdMovedCursorArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyDecidedEnterUtf8() {
- LOG_DEBUG(Service_AM, "Sending Reply: DecidedEnterUtf8");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdDecidedEnterArg));
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::DecidedEnterUtf8);
-
- std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
-
- const SwkbdDecidedEnterArg decided_enter_arg{
- .text_length{static_cast<u32>(current_text.size())},
- };
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &decided_enter_arg,
- sizeof(SwkbdDecidedEnterArg));
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-
- HideInlineKeyboard();
-}
-
-void SoftwareKeyboard::ReplyUnsetCustomizeDic() {
- LOG_DEBUG(Service_AM, "Sending Reply: UnsetCustomizeDic");
-
- std::vector<u8> reply(REPLY_BASE_SIZE);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::UnsetCustomizeDic);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyReleasedUserWordInfo() {
- LOG_DEBUG(Service_AM, "Sending Reply: ReleasedUserWordInfo");
-
- std::vector<u8> reply(REPLY_BASE_SIZE);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::ReleasedUserWordInfo);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyUnsetCustomizedDictionaries() {
- LOG_DEBUG(Service_AM, "Sending Reply: UnsetCustomizedDictionaries");
-
- std::vector<u8> reply(REPLY_BASE_SIZE);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::UnsetCustomizedDictionaries);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyChangedStringV2() {
- LOG_DEBUG(Service_AM, "Sending Reply: ChangedStringV2");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdChangedStringArg) + 1);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedStringV2);
-
- const SwkbdChangedStringArg changed_string_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .dictionary_start_cursor_position{-1},
- .dictionary_end_cursor_position{-1},
- .cursor_position{current_cursor_position},
- };
-
- constexpr u8 flag = 0;
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
- current_text.size() * sizeof(char16_t));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &changed_string_arg,
- sizeof(SwkbdChangedStringArg));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdChangedStringArg),
- &flag, 1);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyMovedCursorV2() {
- LOG_DEBUG(Service_AM, "Sending Reply: MovedCursorV2");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedCursorArg) + 1);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursorV2);
-
- const SwkbdMovedCursorArg moved_cursor_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .cursor_position{current_cursor_position},
- };
-
- constexpr u8 flag = 0;
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, current_text.data(),
- current_text.size() * sizeof(char16_t));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE, &moved_cursor_arg,
- sizeof(SwkbdMovedCursorArg));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF16_SIZE + sizeof(SwkbdMovedCursorArg),
- &flag, 1);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyChangedStringUtf8V2() {
- LOG_DEBUG(Service_AM, "Sending Reply: ChangedStringUtf8V2");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdChangedStringArg) + 1);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::ChangedStringUtf8V2);
-
- std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
-
- const SwkbdChangedStringArg changed_string_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .dictionary_start_cursor_position{-1},
- .dictionary_end_cursor_position{-1},
- .cursor_position{current_cursor_position},
- };
-
- constexpr u8 flag = 0;
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &changed_string_arg,
- sizeof(SwkbdChangedStringArg));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdChangedStringArg),
- &flag, 1);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-void SoftwareKeyboard::ReplyMovedCursorUtf8V2() {
- LOG_DEBUG(Service_AM, "Sending Reply: MovedCursorUtf8V2");
-
- std::vector<u8> reply(REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdMovedCursorArg) + 1);
-
- SetReplyBase(reply, swkbd_state, SwkbdReplyType::MovedCursorUtf8V2);
-
- std::string utf8_current_text = Common::UTF16ToUTF8(current_text);
-
- const SwkbdMovedCursorArg moved_cursor_arg{
- .text_length{static_cast<u32>(current_text.size())},
- .cursor_position{current_cursor_position},
- };
-
- constexpr u8 flag = 0;
-
- std::memcpy(reply.data() + REPLY_BASE_SIZE, utf8_current_text.data(), utf8_current_text.size());
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE, &moved_cursor_arg,
- sizeof(SwkbdMovedCursorArg));
- std::memcpy(reply.data() + REPLY_BASE_SIZE + REPLY_UTF8_SIZE + sizeof(SwkbdMovedCursorArg),
- &flag, 1);
-
- broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(system, std::move(reply)));
-}
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
deleted file mode 100644
index e3fc733d0..000000000
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "core/hle/result.h"
-#include "core/hle/service/am/applets/applets.h"
-#include "core/hle/service/am/applets/software_keyboard_types.h"
-
-namespace Core {
-class System;
-}
-
-namespace Service::AM::Applets {
-
-class SoftwareKeyboard final : public Applet {
-public:
- explicit SoftwareKeyboard(Core::System& system_, LibraryAppletMode applet_mode_,
- Core::Frontend::SoftwareKeyboardApplet& frontend_);
- ~SoftwareKeyboard() override;
-
- void Initialize() override;
-
- bool TransactionComplete() const override;
- ResultCode GetStatus() const override;
- void ExecuteInteractive() override;
- void Execute() override;
-
- /**
- * Submits the input text to the application.
- * If text checking is enabled, the application will verify the input text.
- * If use_utf8 is enabled, the input text will be converted to UTF-8 prior to being submitted.
- * This should only be used by the normal software keyboard.
- *
- * @param result SwkbdResult enum
- * @param submitted_text UTF-16 encoded string
- */
- void SubmitTextNormal(SwkbdResult result, std::u16string submitted_text);
-
- /**
- * Submits the input text to the application.
- * If utf8_mode is enabled, the input text will be converted to UTF-8 prior to being submitted.
- * This should only be used by the inline software keyboard.
- *
- * @param reply_type SwkbdReplyType enum
- * @param submitted_text UTF-16 encoded string
- * @param cursor_position The current position of the text cursor
- */
- void SubmitTextInline(SwkbdReplyType reply_type, std::u16string submitted_text,
- s32 cursor_position);
-
-private:
- /// Initializes the normal software keyboard.
- void InitializeForeground();
-
- /// Initializes the inline software keyboard.
- void InitializeBackground(LibraryAppletMode library_applet_mode);
-
- /// Processes the text check sent by the application.
- void ProcessTextCheck();
-
- /// Processes the inline software keyboard request command sent by the application.
- void ProcessInlineKeyboardRequest();
-
- /// Submits the input text and exits the applet.
- void SubmitNormalOutputAndExit(SwkbdResult result, std::u16string submitted_text);
-
- /// Submits the input text for text checking.
- void SubmitForTextCheck(std::u16string submitted_text);
-
- /// Sends a reply to the application after processing a request command.
- void SendReply(SwkbdReplyType reply_type);
-
- /// Changes the inline keyboard state.
- void ChangeState(SwkbdState state);
-
- /**
- * Signals the frontend to initialize the software keyboard with common parameters.
- * This initializes either the normal software keyboard or the inline software keyboard
- * depending on the state of is_background.
- * Note that this does not cause the keyboard to appear.
- * Use the respective Show*Keyboard() functions to cause the respective keyboards to appear.
- */
- void InitializeFrontendKeyboard();
-
- /// Signals the frontend to show the normal software keyboard.
- void ShowNormalKeyboard();
-
- /// Signals the frontend to show the text check dialog.
- void ShowTextCheckDialog(SwkbdTextCheckResult text_check_result,
- std::u16string text_check_message);
-
- /// Signals the frontend to show the inline software keyboard.
- void ShowInlineKeyboard();
-
- /// Signals the frontend to hide the inline software keyboard.
- void HideInlineKeyboard();
-
- /// Signals the frontend that the current inline keyboard text has changed.
- void InlineTextChanged();
-
- /// Signals both the frontend and application that the software keyboard is exiting.
- void ExitKeyboard();
-
- // Inline Software Keyboard Requests
-
- void RequestFinalize(const std::vector<u8>& request_data);
- void RequestSetUserWordInfo(const std::vector<u8>& request_data);
- void RequestSetCustomizeDic(const std::vector<u8>& request_data);
- void RequestCalc(const std::vector<u8>& request_data);
- void RequestSetCustomizedDictionaries(const std::vector<u8>& request_data);
- void RequestUnsetCustomizedDictionaries(const std::vector<u8>& request_data);
- void RequestSetChangedStringV2Flag(const std::vector<u8>& request_data);
- void RequestSetMovedCursorV2Flag(const std::vector<u8>& request_data);
-
- // Inline Software Keyboard Replies
-
- void ReplyFinishedInitialize();
- void ReplyDefault();
- void ReplyChangedString();
- void ReplyMovedCursor();
- void ReplyMovedTab();
- void ReplyDecidedEnter();
- void ReplyDecidedCancel();
- void ReplyChangedStringUtf8();
- void ReplyMovedCursorUtf8();
- void ReplyDecidedEnterUtf8();
- void ReplyUnsetCustomizeDic();
- void ReplyReleasedUserWordInfo();
- void ReplyUnsetCustomizedDictionaries();
- void ReplyChangedStringV2();
- void ReplyMovedCursorV2();
- void ReplyChangedStringUtf8V2();
- void ReplyMovedCursorUtf8V2();
-
- Core::Frontend::SoftwareKeyboardApplet& frontend;
- Core::System& system;
-
- SwkbdAppletVersion swkbd_applet_version;
-
- SwkbdConfigCommon swkbd_config_common;
- SwkbdConfigOld swkbd_config_old;
- SwkbdConfigOld2 swkbd_config_old2;
- SwkbdConfigNew swkbd_config_new;
- std::u16string initial_text;
-
- SwkbdState swkbd_state{SwkbdState::NotInitialized};
- SwkbdInitializeArg swkbd_initialize_arg;
- SwkbdCalcArg swkbd_calc_arg;
- bool use_changed_string_v2{false};
- bool use_moved_cursor_v2{false};
- bool inline_use_utf8{false};
- s32 current_cursor_position{};
-
- std::u16string current_text;
-
- bool is_background{false};
-
- bool complete{false};
- ResultCode status{ResultSuccess};
-};
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
deleted file mode 100644
index 3b28e829b..000000000
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/fs/file.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/logging/log.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/file_sys/content_archive.h"
-#include "core/file_sys/mode.h"
-#include "core/file_sys/nca_metadata.h"
-#include "core/file_sys/patch_manager.h"
-#include "core/file_sys/registered_cache.h"
-#include "core/file_sys/romfs.h"
-#include "core/file_sys/system_archive/system_archive.h"
-#include "core/file_sys/vfs_vector.h"
-#include "core/frontend/applets/web_browser.h"
-#include "core/hle/kernel/k_process.h"
-#include "core/hle/result.h"
-#include "core/hle/service/am/am.h"
-#include "core/hle/service/am/applets/web_browser.h"
-#include "core/hle/service/filesystem/filesystem.h"
-#include "core/hle/service/ns/pl_u.h"
-
-namespace Service::AM::Applets {
-
-namespace {
-
-template <typename T>
-void ParseRawValue(T& value, const std::vector<u8>& data) {
- static_assert(std::is_trivially_copyable_v<T>,
- "It's undefined behavior to use memcpy with non-trivially copyable objects");
- std::memcpy(&value, data.data(), data.size());
-}
-
-template <typename T>
-T ParseRawValue(const std::vector<u8>& data) {
- T value;
- ParseRawValue(value, data);
- return value;
-}
-
-std::string ParseStringValue(const std::vector<u8>& data) {
- return Common::StringFromFixedZeroTerminatedBuffer(reinterpret_cast<const char*>(data.data()),
- data.size());
-}
-
-std::string GetMainURL(const std::string& url) {
- const auto index = url.find('?');
-
- if (index == std::string::npos) {
- return url;
- }
-
- return url.substr(0, index);
-}
-
-WebArgInputTLVMap ReadWebArgs(const std::vector<u8>& web_arg, WebArgHeader& web_arg_header) {
- std::memcpy(&web_arg_header, web_arg.data(), sizeof(WebArgHeader));
-
- if (web_arg.size() == sizeof(WebArgHeader)) {
- return {};
- }
-
- WebArgInputTLVMap input_tlv_map;
-
- u64 current_offset = sizeof(WebArgHeader);
-
- for (std::size_t i = 0; i < web_arg_header.total_tlv_entries; ++i) {
- if (web_arg.size() < current_offset + sizeof(WebArgInputTLV)) {
- return input_tlv_map;
- }
-
- WebArgInputTLV input_tlv;
- std::memcpy(&input_tlv, web_arg.data() + current_offset, sizeof(WebArgInputTLV));
-
- current_offset += sizeof(WebArgInputTLV);
-
- if (web_arg.size() < current_offset + input_tlv.arg_data_size) {
- return input_tlv_map;
- }
-
- std::vector<u8> data(input_tlv.arg_data_size);
- std::memcpy(data.data(), web_arg.data() + current_offset, input_tlv.arg_data_size);
-
- current_offset += input_tlv.arg_data_size;
-
- input_tlv_map.insert_or_assign(input_tlv.input_tlv_type, std::move(data));
- }
-
- return input_tlv_map;
-}
-
-FileSys::VirtualFile GetOfflineRomFS(Core::System& system, u64 title_id,
- FileSys::ContentRecordType nca_type) {
- if (nca_type == FileSys::ContentRecordType::Data) {
- const auto nca =
- system.GetFileSystemController().GetSystemNANDContents()->GetEntry(title_id, nca_type);
-
- if (nca == nullptr) {
- LOG_ERROR(Service_AM,
- "NCA of type={} with title_id={:016X} is not found in the System NAND!",
- nca_type, title_id);
- return FileSys::SystemArchive::SynthesizeSystemArchive(title_id);
- }
-
- return nca->GetRomFS();
- } else {
- const auto nca = system.GetContentProvider().GetEntry(title_id, nca_type);
-
- if (nca == nullptr) {
- LOG_ERROR(Service_AM,
- "NCA of type={} with title_id={:016X} is not found in the ContentProvider!",
- nca_type, title_id);
- return nullptr;
- }
-
- const FileSys::PatchManager pm{title_id, system.GetFileSystemController(),
- system.GetContentProvider()};
-
- return pm.PatchRomFS(nca->GetRomFS(), nca->GetBaseIVFCOffset(), nca_type);
- }
-}
-
-void ExtractSharedFonts(Core::System& system) {
- static constexpr std::array<const char*, 7> DECRYPTED_SHARED_FONTS{
- "FontStandard.ttf",
- "FontChineseSimplified.ttf",
- "FontExtendedChineseSimplified.ttf",
- "FontChineseTraditional.ttf",
- "FontKorean.ttf",
- "FontNintendoExtended.ttf",
- "FontNintendoExtended2.ttf",
- };
-
- const auto fonts_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) / "fonts";
-
- for (std::size_t i = 0; i < NS::SHARED_FONTS.size(); ++i) {
- const auto font_file_path = fonts_dir / DECRYPTED_SHARED_FONTS[i];
-
- if (Common::FS::Exists(font_file_path)) {
- continue;
- }
-
- const auto font = NS::SHARED_FONTS[i];
- const auto font_title_id = static_cast<u64>(font.first);
-
- const auto nca = system.GetFileSystemController().GetSystemNANDContents()->GetEntry(
- font_title_id, FileSys::ContentRecordType::Data);
-
- FileSys::VirtualFile romfs;
-
- if (!nca) {
- romfs = FileSys::SystemArchive::SynthesizeSystemArchive(font_title_id);
- } else {
- romfs = nca->GetRomFS();
- }
-
- if (!romfs) {
- LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} cannot be extracted!",
- font_title_id);
- continue;
- }
-
- const auto extracted_romfs = FileSys::ExtractRomFS(romfs);
-
- if (!extracted_romfs) {
- LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} failed to extract!",
- font_title_id);
- continue;
- }
-
- const auto font_file = extracted_romfs->GetFile(font.second);
-
- if (!font_file) {
- LOG_ERROR(Service_AM, "SharedFont RomFS with title_id={:016X} has no font file \"{}\"!",
- font_title_id, font.second);
- continue;
- }
-
- std::vector<u32> font_data_u32(font_file->GetSize() / sizeof(u32));
- font_file->ReadBytes<u32>(font_data_u32.data(), font_file->GetSize());
-
- std::transform(font_data_u32.begin(), font_data_u32.end(), font_data_u32.begin(),
- Common::swap32);
-
- std::vector<u8> decrypted_data(font_file->GetSize() - 8);
-
- NS::DecryptSharedFontToTTF(font_data_u32, decrypted_data);
-
- FileSys::VirtualFile decrypted_font = std::make_shared<FileSys::VectorVfsFile>(
- std::move(decrypted_data), DECRYPTED_SHARED_FONTS[i]);
-
- const auto temp_dir = system.GetFilesystem()->CreateDirectory(
- Common::FS::PathToUTF8String(fonts_dir), FileSys::Mode::ReadWrite);
-
- const auto out_file = temp_dir->CreateFile(DECRYPTED_SHARED_FONTS[i]);
-
- FileSys::VfsRawCopy(decrypted_font, out_file);
- }
-}
-
-} // namespace
-
-WebBrowser::WebBrowser(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::WebBrowserApplet& frontend_)
- : Applet{system_, applet_mode_}, frontend(frontend_), system{system_} {}
-
-WebBrowser::~WebBrowser() = default;
-
-void WebBrowser::Initialize() {
- Applet::Initialize();
-
- LOG_INFO(Service_AM, "Initializing Web Browser Applet.");
-
- LOG_DEBUG(Service_AM,
- "Initializing Applet with common_args: arg_version={}, lib_version={}, "
- "play_startup_sound={}, size={}, system_tick={}, theme_color={}",
- common_args.arguments_version, common_args.library_version,
- common_args.play_startup_sound, common_args.size, common_args.system_tick,
- common_args.theme_color);
-
- web_applet_version = WebAppletVersion{common_args.library_version};
-
- const auto web_arg_storage = broker.PopNormalDataToApplet();
- ASSERT(web_arg_storage != nullptr);
-
- const auto& web_arg = web_arg_storage->GetData();
- ASSERT_OR_EXECUTE(web_arg.size() >= sizeof(WebArgHeader), { return; });
-
- web_arg_input_tlv_map = ReadWebArgs(web_arg, web_arg_header);
-
- LOG_DEBUG(Service_AM, "WebArgHeader: total_tlv_entries={}, shim_kind={}",
- web_arg_header.total_tlv_entries, web_arg_header.shim_kind);
-
- ExtractSharedFonts(system);
-
- switch (web_arg_header.shim_kind) {
- case ShimKind::Shop:
- InitializeShop();
- break;
- case ShimKind::Login:
- InitializeLogin();
- break;
- case ShimKind::Offline:
- InitializeOffline();
- break;
- case ShimKind::Share:
- InitializeShare();
- break;
- case ShimKind::Web:
- InitializeWeb();
- break;
- case ShimKind::Wifi:
- InitializeWifi();
- break;
- case ShimKind::Lobby:
- InitializeLobby();
- break;
- default:
- UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind);
- break;
- }
-}
-
-bool WebBrowser::TransactionComplete() const {
- return complete;
-}
-
-ResultCode WebBrowser::GetStatus() const {
- return status;
-}
-
-void WebBrowser::ExecuteInteractive() {
- UNIMPLEMENTED_MSG("WebSession is not implemented");
-}
-
-void WebBrowser::Execute() {
- switch (web_arg_header.shim_kind) {
- case ShimKind::Shop:
- ExecuteShop();
- break;
- case ShimKind::Login:
- ExecuteLogin();
- break;
- case ShimKind::Offline:
- ExecuteOffline();
- break;
- case ShimKind::Share:
- ExecuteShare();
- break;
- case ShimKind::Web:
- ExecuteWeb();
- break;
- case ShimKind::Wifi:
- ExecuteWifi();
- break;
- case ShimKind::Lobby:
- ExecuteLobby();
- break;
- default:
- UNREACHABLE_MSG("Invalid ShimKind={}", web_arg_header.shim_kind);
- WebBrowserExit(WebExitReason::EndButtonPressed);
- break;
- }
-}
-
-void WebBrowser::ExtractOfflineRomFS() {
- LOG_DEBUG(Service_AM, "Extracting RomFS to {}",
- Common::FS::PathToUTF8String(offline_cache_dir));
-
- const auto extracted_romfs_dir =
- FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard);
-
- const auto temp_dir = system.GetFilesystem()->CreateDirectory(
- Common::FS::PathToUTF8String(offline_cache_dir), FileSys::Mode::ReadWrite);
-
- FileSys::VfsRawCopyD(extracted_romfs_dir, temp_dir);
-}
-
-void WebBrowser::WebBrowserExit(WebExitReason exit_reason, std::string last_url) {
- if ((web_arg_header.shim_kind == ShimKind::Share &&
- web_applet_version >= WebAppletVersion::Version196608) ||
- (web_arg_header.shim_kind == ShimKind::Web &&
- web_applet_version >= WebAppletVersion::Version524288)) {
- // TODO: Push Output TLVs instead of a WebCommonReturnValue
- }
-
- WebCommonReturnValue web_common_return_value;
-
- web_common_return_value.exit_reason = exit_reason;
- std::memcpy(&web_common_return_value.last_url, last_url.data(), last_url.size());
- web_common_return_value.last_url_size = last_url.size();
-
- LOG_DEBUG(Service_AM, "WebCommonReturnValue: exit_reason={}, last_url={}, last_url_size={}",
- exit_reason, last_url, last_url.size());
-
- complete = true;
- std::vector<u8> out_data(sizeof(WebCommonReturnValue));
- std::memcpy(out_data.data(), &web_common_return_value, out_data.size());
- broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::move(out_data)));
- broker.SignalStateChanged();
-}
-
-bool WebBrowser::InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const {
- return web_arg_input_tlv_map.find(input_tlv_type) != web_arg_input_tlv_map.end();
-}
-
-std::optional<std::vector<u8>> WebBrowser::GetInputTLVData(WebArgInputTLVType input_tlv_type) {
- const auto map_it = web_arg_input_tlv_map.find(input_tlv_type);
-
- if (map_it == web_arg_input_tlv_map.end()) {
- return std::nullopt;
- }
-
- return map_it->second;
-}
-
-void WebBrowser::InitializeShop() {}
-
-void WebBrowser::InitializeLogin() {}
-
-void WebBrowser::InitializeOffline() {
- const auto document_path =
- ParseStringValue(GetInputTLVData(WebArgInputTLVType::DocumentPath).value());
-
- const auto document_kind =
- ParseRawValue<DocumentKind>(GetInputTLVData(WebArgInputTLVType::DocumentKind).value());
-
- std::string additional_paths;
-
- switch (document_kind) {
- case DocumentKind::OfflineHtmlPage:
- default:
- title_id = system.CurrentProcess()->GetTitleID();
- nca_type = FileSys::ContentRecordType::HtmlDocument;
- additional_paths = "html-document";
- break;
- case DocumentKind::ApplicationLegalInformation:
- title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::ApplicationID).value());
- nca_type = FileSys::ContentRecordType::LegalInformation;
- break;
- case DocumentKind::SystemDataPage:
- title_id = ParseRawValue<u64>(GetInputTLVData(WebArgInputTLVType::SystemDataID).value());
- nca_type = FileSys::ContentRecordType::Data;
- break;
- }
-
- static constexpr std::array<const char*, 3> RESOURCE_TYPES{
- "manual",
- "legal_information",
- "system_data",
- };
-
- offline_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
- fmt::format("offline_web_applet_{}/{:016X}",
- RESOURCE_TYPES[static_cast<u32>(document_kind) - 1], title_id);
-
- offline_document = Common::FS::ConcatPathSafe(
- offline_cache_dir, fmt::format("{}/{}", additional_paths, document_path));
-}
-
-void WebBrowser::InitializeShare() {}
-
-void WebBrowser::InitializeWeb() {
- external_url = ParseStringValue(GetInputTLVData(WebArgInputTLVType::InitialURL).value());
-}
-
-void WebBrowser::InitializeWifi() {}
-
-void WebBrowser::InitializeLobby() {}
-
-void WebBrowser::ExecuteShop() {
- LOG_WARNING(Service_AM, "(STUBBED) called, Shop Applet is not implemented");
- WebBrowserExit(WebExitReason::EndButtonPressed);
-}
-
-void WebBrowser::ExecuteLogin() {
- LOG_WARNING(Service_AM, "(STUBBED) called, Login Applet is not implemented");
- WebBrowserExit(WebExitReason::EndButtonPressed);
-}
-
-void WebBrowser::ExecuteOffline() {
- const auto main_url = GetMainURL(Common::FS::PathToUTF8String(offline_document));
-
- if (!Common::FS::Exists(main_url)) {
- offline_romfs = GetOfflineRomFS(system, title_id, nca_type);
-
- if (offline_romfs == nullptr) {
- LOG_ERROR(Service_AM,
- "RomFS with title_id={:016X} and nca_type={} cannot be extracted!", title_id,
- nca_type);
- WebBrowserExit(WebExitReason::WindowClosed);
- return;
- }
- }
-
- LOG_INFO(Service_AM, "Opening offline document at {}",
- Common::FS::PathToUTF8String(offline_document));
-
- frontend.OpenLocalWebPage(
- Common::FS::PathToUTF8String(offline_document), [this] { ExtractOfflineRomFS(); },
- [this](WebExitReason exit_reason, std::string last_url) {
- WebBrowserExit(exit_reason, last_url);
- });
-}
-
-void WebBrowser::ExecuteShare() {
- LOG_WARNING(Service_AM, "(STUBBED) called, Share Applet is not implemented");
- WebBrowserExit(WebExitReason::EndButtonPressed);
-}
-
-void WebBrowser::ExecuteWeb() {
- LOG_INFO(Service_AM, "Opening external URL at {}", external_url);
-
- frontend.OpenExternalWebPage(external_url,
- [this](WebExitReason exit_reason, std::string last_url) {
- WebBrowserExit(exit_reason, last_url);
- });
-}
-
-void WebBrowser::ExecuteWifi() {
- LOG_WARNING(Service_AM, "(STUBBED) called, Wifi Applet is not implemented");
- WebBrowserExit(WebExitReason::EndButtonPressed);
-}
-
-void WebBrowser::ExecuteLobby() {
- LOG_WARNING(Service_AM, "(STUBBED) called, Lobby Applet is not implemented");
- WebBrowserExit(WebExitReason::EndButtonPressed);
-}
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h
deleted file mode 100644
index 9f81214b6..000000000
--- a/src/core/hle/service/am/applets/web_browser.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <filesystem>
-#include <optional>
-
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "core/file_sys/vfs_types.h"
-#include "core/hle/result.h"
-#include "core/hle/service/am/applets/applets.h"
-#include "core/hle/service/am/applets/web_types.h"
-
-namespace Core {
-class System;
-}
-
-namespace FileSys {
-enum class ContentRecordType : u8;
-}
-
-namespace Service::AM::Applets {
-
-class WebBrowser final : public Applet {
-public:
- WebBrowser(Core::System& system_, LibraryAppletMode applet_mode_,
- const Core::Frontend::WebBrowserApplet& frontend_);
-
- ~WebBrowser() override;
-
- void Initialize() override;
-
- bool TransactionComplete() const override;
- ResultCode GetStatus() const override;
- void ExecuteInteractive() override;
- void Execute() override;
-
- void ExtractOfflineRomFS();
-
- void WebBrowserExit(WebExitReason exit_reason, std::string last_url = "");
-
-private:
- bool InputTLVExistsInMap(WebArgInputTLVType input_tlv_type) const;
-
- std::optional<std::vector<u8>> GetInputTLVData(WebArgInputTLVType input_tlv_type);
-
- // Initializers for the various types of browser applets
- void InitializeShop();
- void InitializeLogin();
- void InitializeOffline();
- void InitializeShare();
- void InitializeWeb();
- void InitializeWifi();
- void InitializeLobby();
-
- // Executors for the various types of browser applets
- void ExecuteShop();
- void ExecuteLogin();
- void ExecuteOffline();
- void ExecuteShare();
- void ExecuteWeb();
- void ExecuteWifi();
- void ExecuteLobby();
-
- const Core::Frontend::WebBrowserApplet& frontend;
-
- bool complete{false};
- ResultCode status{ResultSuccess};
-
- WebAppletVersion web_applet_version{};
- WebArgHeader web_arg_header{};
- WebArgInputTLVMap web_arg_input_tlv_map;
-
- u64 title_id{};
- FileSys::ContentRecordType nca_type{};
- std::filesystem::path offline_cache_dir;
- std::filesystem::path offline_document;
- FileSys::VirtualFile offline_romfs;
-
- std::string external_url;
-
- Core::System& system;
-};
-
-} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index fec704c65..dd945e058 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -117,7 +117,7 @@ AOC_U::AOC_U(Core::System& system_)
{7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
{8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
{9, nullptr, "GetAddOnContentLostErrorCode"},
- {10, nullptr, "GetAddOnContentListChangedEventWithProcessId"},
+ {10, &AOC_U::GetAddOnContentListChangedEventWithProcessId, "GetAddOnContentListChangedEventWithProcessId"},
{100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"},
{101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"},
{110, nullptr, "CreateContentsServiceManager"},
@@ -257,6 +257,14 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) {
rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
}
+void AOC_U::GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_AOC, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(ResultSuccess);
+ rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
+}
+
void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AOC, "(STUBBED) called");
diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h
index 65095baa2..bb6ffb8eb 100644
--- a/src/core/hle/service/aoc/aoc_u.h
+++ b/src/core/hle/service/aoc/aoc_u.h
@@ -28,6 +28,7 @@ private:
void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx);
void PrepareAddOnContent(Kernel::HLERequestContext& ctx);
void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx);
+ void GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx);
void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/apm/apm.cpp b/src/core/hle/service/apm/apm.cpp
index 97d6619dd..f5ebfe8d6 100644
--- a/src/core/hle/service/apm/apm.cpp
+++ b/src/core/hle/service/apm/apm.cpp
@@ -5,7 +5,7 @@
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/service/apm/apm.h"
-#include "core/hle/service/apm/interface.h"
+#include "core/hle/service/apm/apm_interface.h"
namespace Service::APM {
diff --git a/src/core/hle/service/apm/apm_controller.cpp b/src/core/hle/service/apm/apm_controller.cpp
new file mode 100644
index 000000000..98839fe97
--- /dev/null
+++ b/src/core/hle/service/apm/apm_controller.cpp
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <utility>
+
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/core_timing.h"
+#include "core/hle/service/apm/apm_controller.h"
+
+namespace Service::APM {
+
+constexpr auto DEFAULT_PERFORMANCE_CONFIGURATION = PerformanceConfiguration::Config7;
+
+Controller::Controller(Core::Timing::CoreTiming& core_timing_)
+ : core_timing{core_timing_}, configs{
+ {PerformanceMode::Handheld, DEFAULT_PERFORMANCE_CONFIGURATION},
+ {PerformanceMode::Docked, DEFAULT_PERFORMANCE_CONFIGURATION},
+ } {}
+
+Controller::~Controller() = default;
+
+void Controller::SetPerformanceConfiguration(PerformanceMode mode,
+ PerformanceConfiguration config) {
+ static constexpr std::array<std::pair<PerformanceConfiguration, u32>, 16> config_to_speed{{
+ {PerformanceConfiguration::Config1, 1020},
+ {PerformanceConfiguration::Config2, 1020},
+ {PerformanceConfiguration::Config3, 1224},
+ {PerformanceConfiguration::Config4, 1020},
+ {PerformanceConfiguration::Config5, 1020},
+ {PerformanceConfiguration::Config6, 1224},
+ {PerformanceConfiguration::Config7, 1020},
+ {PerformanceConfiguration::Config8, 1020},
+ {PerformanceConfiguration::Config9, 1020},
+ {PerformanceConfiguration::Config10, 1020},
+ {PerformanceConfiguration::Config11, 1020},
+ {PerformanceConfiguration::Config12, 1020},
+ {PerformanceConfiguration::Config13, 1785},
+ {PerformanceConfiguration::Config14, 1785},
+ {PerformanceConfiguration::Config15, 1020},
+ {PerformanceConfiguration::Config16, 1020},
+ }};
+
+ const auto iter = std::find_if(config_to_speed.cbegin(), config_to_speed.cend(),
+ [config](const auto& entry) { return entry.first == config; });
+
+ if (iter == config_to_speed.cend()) {
+ LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", config);
+ return;
+ }
+
+ SetClockSpeed(iter->second);
+ configs.insert_or_assign(mode, config);
+}
+
+void Controller::SetFromCpuBoostMode(CpuBoostMode mode) {
+ constexpr std::array<PerformanceConfiguration, 3> BOOST_MODE_TO_CONFIG_MAP{{
+ PerformanceConfiguration::Config7,
+ PerformanceConfiguration::Config13,
+ PerformanceConfiguration::Config15,
+ }};
+
+ SetPerformanceConfiguration(PerformanceMode::Docked,
+ BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode)));
+}
+
+PerformanceMode Controller::GetCurrentPerformanceMode() const {
+ return Settings::values.use_docked_mode.GetValue() ? PerformanceMode::Docked
+ : PerformanceMode::Handheld;
+}
+
+PerformanceConfiguration Controller::GetCurrentPerformanceConfiguration(PerformanceMode mode) {
+ if (configs.find(mode) == configs.end()) {
+ configs.insert_or_assign(mode, DEFAULT_PERFORMANCE_CONFIGURATION);
+ }
+
+ return configs[mode];
+}
+
+void Controller::SetClockSpeed(u32 mhz) {
+ LOG_INFO(Service_APM, "called, mhz={:08X}", mhz);
+ // TODO(DarkLordZach): Actually signal core_timing to change clock speed.
+ // TODO(Rodrigo): Remove [[maybe_unused]] when core_timing is used.
+}
+
+} // namespace Service::APM
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/apm_controller.h
index 8d48e0104..8d48e0104 100644
--- a/src/core/hle/service/apm/controller.h
+++ b/src/core/hle/service/apm/apm_controller.h
diff --git a/src/core/hle/service/apm/apm_interface.cpp b/src/core/hle/service/apm/apm_interface.cpp
new file mode 100644
index 000000000..e58bad083
--- /dev/null
+++ b/src/core/hle/service/apm/apm_interface.cpp
@@ -0,0 +1,138 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/apm/apm.h"
+#include "core/hle/service/apm/apm_controller.h"
+#include "core/hle/service/apm/apm_interface.h"
+
+namespace Service::APM {
+
+class ISession final : public ServiceFramework<ISession> {
+public:
+ explicit ISession(Core::System& system_, Controller& controller_)
+ : ServiceFramework{system_, "ISession"}, controller{controller_} {
+ static const FunctionInfo functions[] = {
+ {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"},
+ {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"},
+ {2, nullptr, "SetCpuOverclockEnabled"},
+ };
+ RegisterHandlers(functions);
+ }
+
+private:
+ void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+
+ const auto mode = rp.PopEnum<PerformanceMode>();
+ const auto config = rp.PopEnum<PerformanceConfiguration>();
+ LOG_DEBUG(Service_APM, "called mode={} config={}", mode, config);
+
+ controller.SetPerformanceConfiguration(mode, config);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+
+ const auto mode = rp.PopEnum<PerformanceMode>();
+ LOG_DEBUG(Service_APM, "called mode={}", mode);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(controller.GetCurrentPerformanceConfiguration(mode));
+ }
+
+ Controller& controller;
+};
+
+APM::APM(Core::System& system_, std::shared_ptr<Module> apm_, Controller& controller_,
+ const char* name)
+ : ServiceFramework{system_, name}, apm(std::move(apm_)), controller{controller_} {
+ static const FunctionInfo functions[] = {
+ {0, &APM::OpenSession, "OpenSession"},
+ {1, &APM::GetPerformanceMode, "GetPerformanceMode"},
+ {6, &APM::IsCpuOverclockEnabled, "IsCpuOverclockEnabled"},
+ };
+ RegisterHandlers(functions);
+}
+
+APM::~APM() = default;
+
+void APM::OpenSession(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_APM, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<ISession>(system, controller);
+}
+
+void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_APM, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.PushEnum(controller.GetCurrentPerformanceMode());
+}
+
+void APM::IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_APM, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push(false);
+}
+
+APM_Sys::APM_Sys(Core::System& system_, Controller& controller_)
+ : ServiceFramework{system_, "apm:sys"}, controller{controller_} {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "RequestPerformanceMode"},
+ {1, &APM_Sys::GetPerformanceEvent, "GetPerformanceEvent"},
+ {2, nullptr, "GetThrottlingState"},
+ {3, nullptr, "GetLastThrottlingState"},
+ {4, nullptr, "ClearLastThrottlingState"},
+ {5, nullptr, "LoadAndApplySettings"},
+ {6, &APM_Sys::SetCpuBoostMode, "SetCpuBoostMode"},
+ {7, &APM_Sys::GetCurrentPerformanceConfiguration, "GetCurrentPerformanceConfiguration"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+}
+
+APM_Sys::~APM_Sys() = default;
+
+void APM_Sys::GetPerformanceEvent(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_APM, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<ISession>(system, controller);
+}
+
+void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto mode = rp.PopEnum<CpuBoostMode>();
+
+ LOG_DEBUG(Service_APM, "called, mode={:08X}", mode);
+
+ controller.SetFromCpuBoostMode(mode);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
+void APM_Sys::GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_APM, "called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(
+ controller.GetCurrentPerformanceConfiguration(controller.GetCurrentPerformanceMode()));
+}
+
+} // namespace Service::APM
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/apm_interface.h
index 063ad5308..063ad5308 100644
--- a/src/core/hle/service/apm/interface.h
+++ b/src/core/hle/service/apm/apm_interface.h
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp
deleted file mode 100644
index 8bfa7c0e4..000000000
--- a/src/core/hle/service/apm/controller.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright 2019 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <utility>
-
-#include "common/logging/log.h"
-#include "common/settings.h"
-#include "core/core_timing.h"
-#include "core/hle/service/apm/controller.h"
-
-namespace Service::APM {
-
-constexpr auto DEFAULT_PERFORMANCE_CONFIGURATION = PerformanceConfiguration::Config7;
-
-Controller::Controller(Core::Timing::CoreTiming& core_timing_)
- : core_timing{core_timing_}, configs{
- {PerformanceMode::Handheld, DEFAULT_PERFORMANCE_CONFIGURATION},
- {PerformanceMode::Docked, DEFAULT_PERFORMANCE_CONFIGURATION},
- } {}
-
-Controller::~Controller() = default;
-
-void Controller::SetPerformanceConfiguration(PerformanceMode mode,
- PerformanceConfiguration config) {
- static constexpr std::array<std::pair<PerformanceConfiguration, u32>, 16> config_to_speed{{
- {PerformanceConfiguration::Config1, 1020},
- {PerformanceConfiguration::Config2, 1020},
- {PerformanceConfiguration::Config3, 1224},
- {PerformanceConfiguration::Config4, 1020},
- {PerformanceConfiguration::Config5, 1020},
- {PerformanceConfiguration::Config6, 1224},
- {PerformanceConfiguration::Config7, 1020},
- {PerformanceConfiguration::Config8, 1020},
- {PerformanceConfiguration::Config9, 1020},
- {PerformanceConfiguration::Config10, 1020},
- {PerformanceConfiguration::Config11, 1020},
- {PerformanceConfiguration::Config12, 1020},
- {PerformanceConfiguration::Config13, 1785},
- {PerformanceConfiguration::Config14, 1785},
- {PerformanceConfiguration::Config15, 1020},
- {PerformanceConfiguration::Config16, 1020},
- }};
-
- const auto iter = std::find_if(config_to_speed.cbegin(), config_to_speed.cend(),
- [config](const auto& entry) { return entry.first == config; });
-
- if (iter == config_to_speed.cend()) {
- LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", config);
- return;
- }
-
- SetClockSpeed(iter->second);
- configs.insert_or_assign(mode, config);
-}
-
-void Controller::SetFromCpuBoostMode(CpuBoostMode mode) {
- constexpr std::array<PerformanceConfiguration, 3> BOOST_MODE_TO_CONFIG_MAP{{
- PerformanceConfiguration::Config7,
- PerformanceConfiguration::Config13,
- PerformanceConfiguration::Config15,
- }};
-
- SetPerformanceConfiguration(PerformanceMode::Docked,
- BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode)));
-}
-
-PerformanceMode Controller::GetCurrentPerformanceMode() const {
- return Settings::values.use_docked_mode.GetValue() ? PerformanceMode::Docked
- : PerformanceMode::Handheld;
-}
-
-PerformanceConfiguration Controller::GetCurrentPerformanceConfiguration(PerformanceMode mode) {
- if (configs.find(mode) == configs.end()) {
- configs.insert_or_assign(mode, DEFAULT_PERFORMANCE_CONFIGURATION);
- }
-
- return configs[mode];
-}
-
-void Controller::SetClockSpeed(u32 mhz) {
- LOG_INFO(Service_APM, "called, mhz={:08X}", mhz);
- // TODO(DarkLordZach): Actually signal core_timing to change clock speed.
- // TODO(Rodrigo): Remove [[maybe_unused]] when core_timing is used.
-}
-
-} // namespace Service::APM
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
deleted file mode 100644
index d69ddd135..000000000
--- a/src/core/hle/service/apm/interface.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/service/apm/apm.h"
-#include "core/hle/service/apm/controller.h"
-#include "core/hle/service/apm/interface.h"
-
-namespace Service::APM {
-
-class ISession final : public ServiceFramework<ISession> {
-public:
- explicit ISession(Core::System& system_, Controller& controller_)
- : ServiceFramework{system_, "ISession"}, controller{controller_} {
- static const FunctionInfo functions[] = {
- {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"},
- {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"},
- {2, nullptr, "SetCpuOverclockEnabled"},
- };
- RegisterHandlers(functions);
- }
-
-private:
- void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
-
- const auto mode = rp.PopEnum<PerformanceMode>();
- const auto config = rp.PopEnum<PerformanceConfiguration>();
- LOG_DEBUG(Service_APM, "called mode={} config={}", mode, config);
-
- controller.SetPerformanceConfiguration(mode, config);
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
-
- const auto mode = rp.PopEnum<PerformanceMode>();
- LOG_DEBUG(Service_APM, "called mode={}", mode);
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(controller.GetCurrentPerformanceConfiguration(mode));
- }
-
- Controller& controller;
-};
-
-APM::APM(Core::System& system_, std::shared_ptr<Module> apm_, Controller& controller_,
- const char* name)
- : ServiceFramework{system_, name}, apm(std::move(apm_)), controller{controller_} {
- static const FunctionInfo functions[] = {
- {0, &APM::OpenSession, "OpenSession"},
- {1, &APM::GetPerformanceMode, "GetPerformanceMode"},
- {6, &APM::IsCpuOverclockEnabled, "IsCpuOverclockEnabled"},
- };
- RegisterHandlers(functions);
-}
-
-APM::~APM() = default;
-
-void APM::OpenSession(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_APM, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<ISession>(system, controller);
-}
-
-void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_APM, "called");
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.PushEnum(controller.GetCurrentPerformanceMode());
-}
-
-void APM::IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx) {
- LOG_WARNING(Service_APM, "(STUBBED) called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.Push(false);
-}
-
-APM_Sys::APM_Sys(Core::System& system_, Controller& controller_)
- : ServiceFramework{system_, "apm:sys"}, controller{controller_} {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, nullptr, "RequestPerformanceMode"},
- {1, &APM_Sys::GetPerformanceEvent, "GetPerformanceEvent"},
- {2, nullptr, "GetThrottlingState"},
- {3, nullptr, "GetLastThrottlingState"},
- {4, nullptr, "ClearLastThrottlingState"},
- {5, nullptr, "LoadAndApplySettings"},
- {6, &APM_Sys::SetCpuBoostMode, "SetCpuBoostMode"},
- {7, &APM_Sys::GetCurrentPerformanceConfiguration, "GetCurrentPerformanceConfiguration"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
-}
-
-APM_Sys::~APM_Sys() = default;
-
-void APM_Sys::GetPerformanceEvent(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_APM, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<ISession>(system, controller);
-}
-
-void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto mode = rp.PopEnum<CpuBoostMode>();
-
- LOG_DEBUG(Service_APM, "called, mode={:08X}", mode);
-
- controller.SetFromCpuBoostMode(mode);
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
-}
-
-void APM_Sys::GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_APM, "called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(
- controller.GetCurrentPerformanceConfiguration(controller.GetCurrentPerformanceMode()));
-}
-
-} // namespace Service::APM
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 804c6b10c..92d4510b1 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -58,7 +58,7 @@ public:
{7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"},
{8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"},
{9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"},
- {10, nullptr, "GetAudioOutPlayedSampleCount"},
+ {10, &IAudioOut::GetAudioOutPlayedSampleCount, "GetAudioOutPlayedSampleCount"},
{11, &IAudioOut::FlushAudioOutBuffers, "FlushAudioOutBuffers"},
{12, &IAudioOut::SetAudioOutVolume, "SetAudioOutVolume"},
{13, &IAudioOut::GetAudioOutVolume, "GetAudioOutVolume"},
@@ -186,6 +186,14 @@ private:
rb.Push(static_cast<u32>(stream->GetQueueSize()));
}
+ void GetAudioOutPlayedSampleCount(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_Audio, "called");
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(ResultSuccess);
+ rb.Push(stream->GetPlayedSampleCount());
+ }
+
void FlushAudioOutBuffers(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 800feba6e..b769fe959 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -96,7 +96,7 @@ private:
void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "(STUBBED) called");
- std::vector<u8> output_params(ctx.GetWriteBufferSize());
+ std::vector<u8> output_params(ctx.GetWriteBufferSize(), 0);
auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params);
if (result.IsSuccess()) {
@@ -110,17 +110,19 @@ private:
void Start(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
- IPC::ResponseBuilder rb{ctx, 2};
+ const auto result = renderer->Start();
- rb.Push(ResultSuccess);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(result);
}
void Stop(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
- IPC::ResponseBuilder rb{ctx, 2};
+ const auto result = renderer->Stop();
- rb.Push(ResultSuccess);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(result);
}
void QuerySystemEvent(Kernel::HLERequestContext& ctx) {
@@ -288,7 +290,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- rb.Push<u32>(1);
+ rb.Push<u32>(2);
}
// Should be similar to QueryAudioDeviceOutputEvent
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 10e6f7a64..33a6dbbb6 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -253,7 +253,11 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
rb.Push<u32>(worker_buffer_sz);
}
-void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
+void HwOpus::GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx) {
+ GetWorkBufferSize(ctx);
+}
+
+void HwOpus::OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto sample_rate = rp.Pop<u32>();
const auto channel_count = rp.Pop<u32>();
@@ -291,14 +295,47 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
}
+void HwOpus::OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto sample_rate = rp.Pop<u32>();
+ const auto channel_count = rp.Pop<u32>();
+
+ LOG_CRITICAL(Audio, "called sample_rate={}, channel_count={}", sample_rate, channel_count);
+
+ ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
+ sample_rate == 12000 || sample_rate == 8000,
+ "Invalid sample rate");
+ ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
+
+ const int num_stereo_streams = channel_count == 2 ? 1 : 0;
+ const auto mapping_table = CreateMappingTable(channel_count);
+
+ int error = 0;
+ OpusDecoderPtr decoder{
+ opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
+ num_stereo_streams, mapping_table.data(), &error)};
+ if (error != OPUS_OK || decoder == nullptr) {
+ LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
+ IPC::ResponseBuilder rb{ctx, 2};
+ // TODO(ogniK): Use correct error code
+ rb.Push(ResultUnknown);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IHardwareOpusDecoderManager>(
+ system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
+}
+
HwOpus::HwOpus(Core::System& system_) : ServiceFramework{system_, "hwopus"} {
static const FunctionInfo functions[] = {
- {0, &HwOpus::OpenOpusDecoder, "OpenOpusDecoder"},
+ {0, &HwOpus::OpenHardwareOpusDecoder, "OpenHardwareOpusDecoder"},
{1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"},
{2, nullptr, "OpenOpusDecoderForMultiStream"},
{3, nullptr, "GetWorkBufferSizeForMultiStream"},
- {4, nullptr, "OpenHardwareOpusDecoderEx"},
- {5, nullptr, "GetWorkBufferSizeEx"},
+ {4, &HwOpus::OpenHardwareOpusDecoderEx, "OpenHardwareOpusDecoderEx"},
+ {5, &HwOpus::GetWorkBufferSizeEx, "GetWorkBufferSizeEx"},
{6, nullptr, "OpenHardwareOpusDecoderForMultiStreamEx"},
{7, nullptr, "GetWorkBufferSizeForMultiStreamEx"},
};
diff --git a/src/core/hle/service/audio/hwopus.h b/src/core/hle/service/audio/hwopus.h
index 4f921f18e..b74824ff3 100644
--- a/src/core/hle/service/audio/hwopus.h
+++ b/src/core/hle/service/audio/hwopus.h
@@ -18,8 +18,10 @@ public:
~HwOpus() override;
private:
- void OpenOpusDecoder(Kernel::HLERequestContext& ctx);
+ void OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx);
+ void OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx);
void GetWorkBufferSize(Kernel::HLERequestContext& ctx);
+ void GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx);
};
} // namespace Service::Audio
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp
index a2844ea8c..7ca7f2aac 100644
--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -7,6 +7,9 @@
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
#endif
#include <httplib.h>
#include <mbedtls/sha256.h>
@@ -313,7 +316,7 @@ void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, DirectoryGe
LOG_ERROR(Service_BCAT, "Boxcat synchronization failed with error '{}'!", res);
if (res == DownloadResult::NoMatchBuildId || res == DownloadResult::NoMatchTitleId) {
- void(Common::FS::RemoveFile(zip_path));
+ Common::FS::RemoveFile(zip_path);
}
HandleDownloadDisplayResult(applet_manager, res);
@@ -445,7 +448,7 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title)
LOG_ERROR(Service_BCAT, "Boxcat synchronization failed with error '{}'!", res);
if (res == DownloadResult::NoMatchBuildId || res == DownloadResult::NoMatchTitleId) {
- void(Common::FS::RemoveFile(bin_file_path));
+ Common::FS::RemoveFile(bin_file_path);
}
HandleDownloadDisplayResult(applet_manager, res);
diff --git a/src/core/hle/service/bcat/bcat.h b/src/core/hle/service/bcat/bcat.h
index d72798980..1eba477da 100644
--- a/src/core/hle/service/bcat/bcat.h
+++ b/src/core/hle/service/bcat/bcat.h
@@ -4,7 +4,7 @@
#pragma once
-#include "core/hle/service/bcat/module.h"
+#include "core/hle/service/bcat/bcat_module.h"
namespace Core {
class System;
diff --git a/src/core/hle/service/bcat/bcat_module.cpp b/src/core/hle/service/bcat/bcat_module.cpp
new file mode 100644
index 000000000..72294eb2e
--- /dev/null
+++ b/src/core/hle/service/bcat/bcat_module.cpp
@@ -0,0 +1,610 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cctype>
+#include <mbedtls/md5.h>
+#include "backend/boxcat.h"
+#include "common/hex_util.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/file_sys/vfs.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_readable_event.h"
+#include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/service/bcat/backend/backend.h"
+#include "core/hle/service/bcat/bcat.h"
+#include "core/hle/service/bcat/bcat_module.h"
+#include "core/hle/service/filesystem/filesystem.h"
+
+namespace Service::BCAT {
+
+constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::BCAT, 1};
+constexpr ResultCode ERROR_FAILED_OPEN_ENTITY{ErrorModule::BCAT, 2};
+constexpr ResultCode ERROR_ENTITY_ALREADY_OPEN{ErrorModule::BCAT, 6};
+constexpr ResultCode ERROR_NO_OPEN_ENTITY{ErrorModule::BCAT, 7};
+
+// The command to clear the delivery cache just calls fs IFileSystem DeleteFile on all of the files
+// and if any of them have a non-zero result it just forwards that result. This is the FS error code
+// for permission denied, which is the closest approximation of this scenario.
+constexpr ResultCode ERROR_FAILED_CLEAR_CACHE{ErrorModule::FS, 6400};
+
+using BCATDigest = std::array<u8, 0x10>;
+
+namespace {
+
+u64 GetCurrentBuildID(const Core::System::CurrentBuildProcessID& id) {
+ u64 out{};
+ std::memcpy(&out, id.data(), sizeof(u64));
+ return out;
+}
+
+// The digest is only used to determine if a file is unique compared to others of the same name.
+// Since the algorithm isn't ever checked in game, MD5 is safe.
+BCATDigest DigestFile(const FileSys::VirtualFile& file) {
+ BCATDigest out{};
+ const auto bytes = file->ReadAllBytes();
+ mbedtls_md5_ret(bytes.data(), bytes.size(), out.data());
+ return out;
+}
+
+// For a name to be valid it must be non-empty, must have a null terminating character as the final
+// char, can only contain numbers, letters, underscores and a hyphen if directory and a period if
+// file.
+bool VerifyNameValidInternal(Kernel::HLERequestContext& ctx, std::array<char, 0x20> name,
+ char match_char) {
+ const auto null_chars = std::count(name.begin(), name.end(), 0);
+ const auto bad_chars = std::count_if(name.begin(), name.end(), [match_char](char c) {
+ return !std::isalnum(static_cast<u8>(c)) && c != '_' && c != match_char && c != '\0';
+ });
+ if (null_chars == 0x20 || null_chars == 0 || bad_chars != 0 || name[0x1F] != '\0') {
+ LOG_ERROR(Service_BCAT, "Name passed was invalid!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_INVALID_ARGUMENT);
+ return false;
+ }
+
+ return true;
+}
+
+bool VerifyNameValidDir(Kernel::HLERequestContext& ctx, DirectoryName name) {
+ return VerifyNameValidInternal(ctx, name, '-');
+}
+
+bool VerifyNameValidFile(Kernel::HLERequestContext& ctx, FileName name) {
+ return VerifyNameValidInternal(ctx, name, '.');
+}
+
+} // Anonymous namespace
+
+struct DeliveryCacheDirectoryEntry {
+ FileName name;
+ u64 size;
+ BCATDigest digest;
+};
+
+class IDeliveryCacheProgressService final : public ServiceFramework<IDeliveryCacheProgressService> {
+public:
+ explicit IDeliveryCacheProgressService(Core::System& system_, Kernel::KReadableEvent& event_,
+ const DeliveryCacheProgressImpl& impl_)
+ : ServiceFramework{system_, "IDeliveryCacheProgressService"}, event{event_}, impl{impl_} {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, &IDeliveryCacheProgressService::GetEvent, "GetEvent"},
+ {1, &IDeliveryCacheProgressService::GetImpl, "GetImpl"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+
+private:
+ void GetEvent(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(ResultSuccess);
+ rb.PushCopyObjects(event);
+ }
+
+ void GetImpl(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ ctx.WriteBuffer(impl);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ Kernel::KReadableEvent& event;
+ const DeliveryCacheProgressImpl& impl;
+};
+
+class IBcatService final : public ServiceFramework<IBcatService> {
+public:
+ explicit IBcatService(Core::System& system_, Backend& backend_)
+ : ServiceFramework{system_, "IBcatService"}, backend{backend_},
+ progress{{
+ ProgressServiceBackend{system_.Kernel(), "Normal"},
+ ProgressServiceBackend{system_.Kernel(), "Directory"},
+ }} {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"},
+ {10101, &IBcatService::RequestSyncDeliveryCacheWithDirectoryName, "RequestSyncDeliveryCacheWithDirectoryName"},
+ {10200, nullptr, "CancelSyncDeliveryCacheRequest"},
+ {20100, nullptr, "RequestSyncDeliveryCacheWithApplicationId"},
+ {20101, nullptr, "RequestSyncDeliveryCacheWithApplicationIdAndDirectoryName"},
+ {20300, nullptr, "GetDeliveryCacheStorageUpdateNotifier"},
+ {20301, nullptr, "RequestSuspendDeliveryTask"},
+ {20400, nullptr, "RegisterSystemApplicationDeliveryTask"},
+ {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},
+ {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},
+ {30100, &IBcatService::SetPassphrase, "SetPassphrase"},
+ {30101, nullptr, "Unknown"},
+ {30102, nullptr, "Unknown2"},
+ {30200, nullptr, "RegisterBackgroundDeliveryTask"},
+ {30201, nullptr, "UnregisterBackgroundDeliveryTask"},
+ {30202, nullptr, "BlockDeliveryTask"},
+ {30203, nullptr, "UnblockDeliveryTask"},
+ {30210, nullptr, "SetDeliveryTaskTimer"},
+ {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"},
+ {90100, nullptr, "EnumerateBackgroundDeliveryTask"},
+ {90101, nullptr, "Unknown90101"},
+ {90200, nullptr, "GetDeliveryList"},
+ {90201, &IBcatService::ClearDeliveryCacheStorage, "ClearDeliveryCacheStorage"},
+ {90202, nullptr, "ClearDeliveryTaskSubscriptionStatus"},
+ {90300, nullptr, "GetPushNotificationLog"},
+ {90301, nullptr, "Unknown90301"},
+ };
+ // clang-format on
+ RegisterHandlers(functions);
+ }
+
+private:
+ enum class SyncType {
+ Normal,
+ Directory,
+ Count,
+ };
+
+ std::shared_ptr<IDeliveryCacheProgressService> CreateProgressService(SyncType type) {
+ auto& progress_backend{GetProgressBackend(type)};
+ return std::make_shared<IDeliveryCacheProgressService>(system, progress_backend.GetEvent(),
+ progress_backend.GetImpl());
+ }
+
+ void RequestSyncDeliveryCache(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ backend.Synchronize({system.CurrentProcess()->GetTitleID(),
+ GetCurrentBuildID(system.GetCurrentProcessBuildID())},
+ GetProgressBackend(SyncType::Normal));
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface(CreateProgressService(SyncType::Normal));
+ }
+
+ void RequestSyncDeliveryCacheWithDirectoryName(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto name_raw = rp.PopRaw<DirectoryName>();
+ const auto name =
+ Common::StringFromFixedZeroTerminatedBuffer(name_raw.data(), name_raw.size());
+
+ LOG_DEBUG(Service_BCAT, "called, name={}", name);
+
+ backend.SynchronizeDirectory({system.CurrentProcess()->GetTitleID(),
+ GetCurrentBuildID(system.GetCurrentProcessBuildID())},
+ name, GetProgressBackend(SyncType::Directory));
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface(CreateProgressService(SyncType::Directory));
+ }
+
+ void SetPassphrase(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto title_id = rp.PopRaw<u64>();
+
+ const auto passphrase_raw = ctx.ReadBuffer();
+
+ LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id,
+ Common::HexToString(passphrase_raw));
+
+ if (title_id == 0) {
+ LOG_ERROR(Service_BCAT, "Invalid title ID!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_INVALID_ARGUMENT);
+ }
+
+ if (passphrase_raw.size() > 0x40) {
+ LOG_ERROR(Service_BCAT, "Passphrase too large!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_INVALID_ARGUMENT);
+ return;
+ }
+
+ Passphrase passphrase{};
+ std::memcpy(passphrase.data(), passphrase_raw.data(),
+ std::min(passphrase.size(), passphrase_raw.size()));
+
+ backend.SetPassphrase(title_id, passphrase);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ void ClearDeliveryCacheStorage(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto title_id = rp.PopRaw<u64>();
+
+ LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);
+
+ if (title_id == 0) {
+ LOG_ERROR(Service_BCAT, "Invalid title ID!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_INVALID_ARGUMENT);
+ return;
+ }
+
+ if (!backend.Clear(title_id)) {
+ LOG_ERROR(Service_BCAT, "Could not clear the directory successfully!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_FAILED_CLEAR_CACHE);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ ProgressServiceBackend& GetProgressBackend(SyncType type) {
+ return progress.at(static_cast<size_t>(type));
+ }
+
+ const ProgressServiceBackend& GetProgressBackend(SyncType type) const {
+ return progress.at(static_cast<size_t>(type));
+ }
+
+ Backend& backend;
+ std::array<ProgressServiceBackend, static_cast<size_t>(SyncType::Count)> progress;
+};
+
+void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IBcatService>(system, *backend);
+}
+
+class IDeliveryCacheFileService final : public ServiceFramework<IDeliveryCacheFileService> {
+public:
+ explicit IDeliveryCacheFileService(Core::System& system_, FileSys::VirtualDir root_)
+ : ServiceFramework{system_, "IDeliveryCacheFileService"}, root(std::move(root_)) {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, &IDeliveryCacheFileService::Open, "Open"},
+ {1, &IDeliveryCacheFileService::Read, "Read"},
+ {2, &IDeliveryCacheFileService::GetSize, "GetSize"},
+ {3, &IDeliveryCacheFileService::GetDigest, "GetDigest"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+
+private:
+ void Open(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto dir_name_raw = rp.PopRaw<DirectoryName>();
+ const auto file_name_raw = rp.PopRaw<FileName>();
+
+ const auto dir_name =
+ Common::StringFromFixedZeroTerminatedBuffer(dir_name_raw.data(), dir_name_raw.size());
+ const auto file_name =
+ Common::StringFromFixedZeroTerminatedBuffer(file_name_raw.data(), file_name_raw.size());
+
+ LOG_DEBUG(Service_BCAT, "called, dir_name={}, file_name={}", dir_name, file_name);
+
+ if (!VerifyNameValidDir(ctx, dir_name_raw) || !VerifyNameValidFile(ctx, file_name_raw))
+ return;
+
+ if (current_file != nullptr) {
+ LOG_ERROR(Service_BCAT, "A file has already been opened on this interface!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_ENTITY_ALREADY_OPEN);
+ return;
+ }
+
+ const auto dir = root->GetSubdirectory(dir_name);
+
+ if (dir == nullptr) {
+ LOG_ERROR(Service_BCAT, "The directory of name={} couldn't be opened!", dir_name);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_FAILED_OPEN_ENTITY);
+ return;
+ }
+
+ current_file = dir->GetFile(file_name);
+
+ if (current_file == nullptr) {
+ LOG_ERROR(Service_BCAT, "The file of name={} couldn't be opened!", file_name);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_FAILED_OPEN_ENTITY);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ void Read(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto offset{rp.PopRaw<u64>()};
+
+ auto size = ctx.GetWriteBufferSize();
+
+ LOG_DEBUG(Service_BCAT, "called, offset={:016X}, size={:016X}", offset, size);
+
+ if (current_file == nullptr) {
+ LOG_ERROR(Service_BCAT, "There is no file currently open!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_NO_OPEN_ENTITY);
+ }
+
+ size = std::min<u64>(current_file->GetSize() - offset, size);
+ const auto buffer = current_file->ReadBytes(size, offset);
+ ctx.WriteBuffer(buffer);
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(ResultSuccess);
+ rb.Push<u64>(buffer.size());
+ }
+
+ void GetSize(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ if (current_file == nullptr) {
+ LOG_ERROR(Service_BCAT, "There is no file currently open!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_NO_OPEN_ENTITY);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(ResultSuccess);
+ rb.Push<u64>(current_file->GetSize());
+ }
+
+ void GetDigest(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ if (current_file == nullptr) {
+ LOG_ERROR(Service_BCAT, "There is no file currently open!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_NO_OPEN_ENTITY);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 6};
+ rb.Push(ResultSuccess);
+ rb.PushRaw(DigestFile(current_file));
+ }
+
+ FileSys::VirtualDir root;
+ FileSys::VirtualFile current_file;
+};
+
+class IDeliveryCacheDirectoryService final
+ : public ServiceFramework<IDeliveryCacheDirectoryService> {
+public:
+ explicit IDeliveryCacheDirectoryService(Core::System& system_, FileSys::VirtualDir root_)
+ : ServiceFramework{system_, "IDeliveryCacheDirectoryService"}, root(std::move(root_)) {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, &IDeliveryCacheDirectoryService::Open, "Open"},
+ {1, &IDeliveryCacheDirectoryService::Read, "Read"},
+ {2, &IDeliveryCacheDirectoryService::GetCount, "GetCount"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+
+private:
+ void Open(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto name_raw = rp.PopRaw<DirectoryName>();
+ const auto name =
+ Common::StringFromFixedZeroTerminatedBuffer(name_raw.data(), name_raw.size());
+
+ LOG_DEBUG(Service_BCAT, "called, name={}", name);
+
+ if (!VerifyNameValidDir(ctx, name_raw))
+ return;
+
+ if (current_dir != nullptr) {
+ LOG_ERROR(Service_BCAT, "A file has already been opened on this interface!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_ENTITY_ALREADY_OPEN);
+ return;
+ }
+
+ current_dir = root->GetSubdirectory(name);
+
+ if (current_dir == nullptr) {
+ LOG_ERROR(Service_BCAT, "Failed to open the directory name={}!", name);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_FAILED_OPEN_ENTITY);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ void Read(Kernel::HLERequestContext& ctx) {
+ auto write_size = ctx.GetWriteBufferSize() / sizeof(DeliveryCacheDirectoryEntry);
+
+ LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size);
+
+ if (current_dir == nullptr) {
+ LOG_ERROR(Service_BCAT, "There is no open directory!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_NO_OPEN_ENTITY);
+ return;
+ }
+
+ const auto files = current_dir->GetFiles();
+ write_size = std::min<u64>(write_size, files.size());
+ std::vector<DeliveryCacheDirectoryEntry> entries(write_size);
+ std::transform(
+ files.begin(), files.begin() + write_size, entries.begin(), [](const auto& file) {
+ FileName name{};
+ std::memcpy(name.data(), file->GetName().data(),
+ std::min(file->GetName().size(), name.size()));
+ return DeliveryCacheDirectoryEntry{name, file->GetSize(), DigestFile(file)};
+ });
+
+ ctx.WriteBuffer(entries);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push(static_cast<u32>(write_size * sizeof(DeliveryCacheDirectoryEntry)));
+ }
+
+ void GetCount(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ if (current_dir == nullptr) {
+ LOG_ERROR(Service_BCAT, "There is no open directory!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERROR_NO_OPEN_ENTITY);
+ return;
+ }
+
+ const auto files = current_dir->GetFiles();
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push(static_cast<u32>(files.size()));
+ }
+
+ FileSys::VirtualDir root;
+ FileSys::VirtualDir current_dir;
+};
+
+class IDeliveryCacheStorageService final : public ServiceFramework<IDeliveryCacheStorageService> {
+public:
+ explicit IDeliveryCacheStorageService(Core::System& system_, FileSys::VirtualDir root_)
+ : ServiceFramework{system_, "IDeliveryCacheStorageService"}, root(std::move(root_)) {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, &IDeliveryCacheStorageService::CreateFileService, "CreateFileService"},
+ {1, &IDeliveryCacheStorageService::CreateDirectoryService, "CreateDirectoryService"},
+ {10, &IDeliveryCacheStorageService::EnumerateDeliveryCacheDirectory, "EnumerateDeliveryCacheDirectory"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+
+ for (const auto& subdir : root->GetSubdirectories()) {
+ DirectoryName name{};
+ std::memcpy(name.data(), subdir->GetName().data(),
+ std::min(sizeof(DirectoryName) - 1, subdir->GetName().size()));
+ entries.push_back(name);
+ }
+ }
+
+private:
+ void CreateFileService(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IDeliveryCacheFileService>(system, root);
+ }
+
+ void CreateDirectoryService(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IDeliveryCacheDirectoryService>(system, root);
+ }
+
+ void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) {
+ auto size = ctx.GetWriteBufferSize() / sizeof(DirectoryName);
+
+ LOG_DEBUG(Service_BCAT, "called, size={:016X}", size);
+
+ size = std::min<u64>(size, entries.size() - next_read_index);
+ ctx.WriteBuffer(entries.data() + next_read_index, size * sizeof(DirectoryName));
+ next_read_index += size;
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push(static_cast<u32>(size));
+ }
+
+ FileSys::VirtualDir root;
+ std::vector<DirectoryName> entries;
+ u64 next_read_index = 0;
+};
+
+void Module::Interface::CreateDeliveryCacheStorageService(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_BCAT, "called");
+
+ const auto title_id = system.CurrentProcess()->GetTitleID();
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IDeliveryCacheStorageService>(system, fsc.GetBCATDirectory(title_id));
+}
+
+void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId(
+ Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto title_id = rp.PopRaw<u64>();
+
+ LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IDeliveryCacheStorageService>(system, fsc.GetBCATDirectory(title_id));
+}
+
+std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system,
+ DirectoryGetter getter) {
+#ifdef YUZU_ENABLE_BOXCAT
+ if (Settings::values.bcat_backend.GetValue() == "boxcat") {
+ return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter));
+ }
+#endif
+
+ return std::make_unique<NullBackend>(std::move(getter));
+}
+
+Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
+ FileSystem::FileSystemController& fsc_, const char* name)
+ : ServiceFramework{system_, name}, fsc{fsc_}, module{std::move(module_)},
+ backend{CreateBackendFromSettings(system_,
+ [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })} {}
+
+Module::Interface::~Interface() = default;
+
+void InstallInterfaces(Core::System& system) {
+ auto module = std::make_shared<Module>();
+ std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:a")
+ ->InstallAsService(system.ServiceManager());
+ std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:m")
+ ->InstallAsService(system.ServiceManager());
+ std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:u")
+ ->InstallAsService(system.ServiceManager());
+ std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:s")
+ ->InstallAsService(system.ServiceManager());
+}
+
+} // namespace Service::BCAT
diff --git a/src/core/hle/service/bcat/module.h b/src/core/hle/service/bcat/bcat_module.h
index 738731c06..738731c06 100644
--- a/src/core/hle/service/bcat/module.h
+++ b/src/core/hle/service/bcat/bcat_module.h
diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp
deleted file mode 100644
index 44e4d0509..000000000
--- a/src/core/hle/service/bcat/module.cpp
+++ /dev/null
@@ -1,610 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cctype>
-#include <mbedtls/md5.h>
-#include "backend/boxcat.h"
-#include "common/hex_util.h"
-#include "common/logging/log.h"
-#include "common/settings.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/file_sys/vfs.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/k_process.h"
-#include "core/hle/kernel/k_readable_event.h"
-#include "core/hle/kernel/k_writable_event.h"
-#include "core/hle/service/bcat/backend/backend.h"
-#include "core/hle/service/bcat/bcat.h"
-#include "core/hle/service/bcat/module.h"
-#include "core/hle/service/filesystem/filesystem.h"
-
-namespace Service::BCAT {
-
-constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::BCAT, 1};
-constexpr ResultCode ERROR_FAILED_OPEN_ENTITY{ErrorModule::BCAT, 2};
-constexpr ResultCode ERROR_ENTITY_ALREADY_OPEN{ErrorModule::BCAT, 6};
-constexpr ResultCode ERROR_NO_OPEN_ENTITY{ErrorModule::BCAT, 7};
-
-// The command to clear the delivery cache just calls fs IFileSystem DeleteFile on all of the files
-// and if any of them have a non-zero result it just forwards that result. This is the FS error code
-// for permission denied, which is the closest approximation of this scenario.
-constexpr ResultCode ERROR_FAILED_CLEAR_CACHE{ErrorModule::FS, 6400};
-
-using BCATDigest = std::array<u8, 0x10>;
-
-namespace {
-
-u64 GetCurrentBuildID(const Core::System::CurrentBuildProcessID& id) {
- u64 out{};
- std::memcpy(&out, id.data(), sizeof(u64));
- return out;
-}
-
-// The digest is only used to determine if a file is unique compared to others of the same name.
-// Since the algorithm isn't ever checked in game, MD5 is safe.
-BCATDigest DigestFile(const FileSys::VirtualFile& file) {
- BCATDigest out{};
- const auto bytes = file->ReadAllBytes();
- mbedtls_md5_ret(bytes.data(), bytes.size(), out.data());
- return out;
-}
-
-// For a name to be valid it must be non-empty, must have a null terminating character as the final
-// char, can only contain numbers, letters, underscores and a hyphen if directory and a period if
-// file.
-bool VerifyNameValidInternal(Kernel::HLERequestContext& ctx, std::array<char, 0x20> name,
- char match_char) {
- const auto null_chars = std::count(name.begin(), name.end(), 0);
- const auto bad_chars = std::count_if(name.begin(), name.end(), [match_char](char c) {
- return !std::isalnum(static_cast<u8>(c)) && c != '_' && c != match_char && c != '\0';
- });
- if (null_chars == 0x20 || null_chars == 0 || bad_chars != 0 || name[0x1F] != '\0') {
- LOG_ERROR(Service_BCAT, "Name passed was invalid!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_INVALID_ARGUMENT);
- return false;
- }
-
- return true;
-}
-
-bool VerifyNameValidDir(Kernel::HLERequestContext& ctx, DirectoryName name) {
- return VerifyNameValidInternal(ctx, name, '-');
-}
-
-bool VerifyNameValidFile(Kernel::HLERequestContext& ctx, FileName name) {
- return VerifyNameValidInternal(ctx, name, '.');
-}
-
-} // Anonymous namespace
-
-struct DeliveryCacheDirectoryEntry {
- FileName name;
- u64 size;
- BCATDigest digest;
-};
-
-class IDeliveryCacheProgressService final : public ServiceFramework<IDeliveryCacheProgressService> {
-public:
- explicit IDeliveryCacheProgressService(Core::System& system_, Kernel::KReadableEvent& event_,
- const DeliveryCacheProgressImpl& impl_)
- : ServiceFramework{system_, "IDeliveryCacheProgressService"}, event{event_}, impl{impl_} {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, &IDeliveryCacheProgressService::GetEvent, "GetEvent"},
- {1, &IDeliveryCacheProgressService::GetImpl, "GetImpl"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
- }
-
-private:
- void GetEvent(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 1};
- rb.Push(ResultSuccess);
- rb.PushCopyObjects(event);
- }
-
- void GetImpl(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- ctx.WriteBuffer(impl);
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- Kernel::KReadableEvent& event;
- const DeliveryCacheProgressImpl& impl;
-};
-
-class IBcatService final : public ServiceFramework<IBcatService> {
-public:
- explicit IBcatService(Core::System& system_, Backend& backend_)
- : ServiceFramework{system_, "IBcatService"}, backend{backend_},
- progress{{
- ProgressServiceBackend{system_.Kernel(), "Normal"},
- ProgressServiceBackend{system_.Kernel(), "Directory"},
- }} {
- // clang-format off
- static const FunctionInfo functions[] = {
- {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"},
- {10101, &IBcatService::RequestSyncDeliveryCacheWithDirectoryName, "RequestSyncDeliveryCacheWithDirectoryName"},
- {10200, nullptr, "CancelSyncDeliveryCacheRequest"},
- {20100, nullptr, "RequestSyncDeliveryCacheWithApplicationId"},
- {20101, nullptr, "RequestSyncDeliveryCacheWithApplicationIdAndDirectoryName"},
- {20300, nullptr, "GetDeliveryCacheStorageUpdateNotifier"},
- {20301, nullptr, "RequestSuspendDeliveryTask"},
- {20400, nullptr, "RegisterSystemApplicationDeliveryTask"},
- {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},
- {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},
- {30100, &IBcatService::SetPassphrase, "SetPassphrase"},
- {30101, nullptr, "Unknown"},
- {30102, nullptr, "Unknown2"},
- {30200, nullptr, "RegisterBackgroundDeliveryTask"},
- {30201, nullptr, "UnregisterBackgroundDeliveryTask"},
- {30202, nullptr, "BlockDeliveryTask"},
- {30203, nullptr, "UnblockDeliveryTask"},
- {30210, nullptr, "SetDeliveryTaskTimer"},
- {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"},
- {90100, nullptr, "EnumerateBackgroundDeliveryTask"},
- {90101, nullptr, "Unknown90101"},
- {90200, nullptr, "GetDeliveryList"},
- {90201, &IBcatService::ClearDeliveryCacheStorage, "ClearDeliveryCacheStorage"},
- {90202, nullptr, "ClearDeliveryTaskSubscriptionStatus"},
- {90300, nullptr, "GetPushNotificationLog"},
- {90301, nullptr, "Unknown90301"},
- };
- // clang-format on
- RegisterHandlers(functions);
- }
-
-private:
- enum class SyncType {
- Normal,
- Directory,
- Count,
- };
-
- std::shared_ptr<IDeliveryCacheProgressService> CreateProgressService(SyncType type) {
- auto& progress_backend{GetProgressBackend(type)};
- return std::make_shared<IDeliveryCacheProgressService>(system, progress_backend.GetEvent(),
- progress_backend.GetImpl());
- }
-
- void RequestSyncDeliveryCache(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- backend.Synchronize({system.CurrentProcess()->GetTitleID(),
- GetCurrentBuildID(system.GetCurrentProcessBuildID())},
- GetProgressBackend(SyncType::Normal));
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface(CreateProgressService(SyncType::Normal));
- }
-
- void RequestSyncDeliveryCacheWithDirectoryName(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto name_raw = rp.PopRaw<DirectoryName>();
- const auto name =
- Common::StringFromFixedZeroTerminatedBuffer(name_raw.data(), name_raw.size());
-
- LOG_DEBUG(Service_BCAT, "called, name={}", name);
-
- backend.SynchronizeDirectory({system.CurrentProcess()->GetTitleID(),
- GetCurrentBuildID(system.GetCurrentProcessBuildID())},
- name, GetProgressBackend(SyncType::Directory));
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface(CreateProgressService(SyncType::Directory));
- }
-
- void SetPassphrase(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto title_id = rp.PopRaw<u64>();
-
- const auto passphrase_raw = ctx.ReadBuffer();
-
- LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id,
- Common::HexToString(passphrase_raw));
-
- if (title_id == 0) {
- LOG_ERROR(Service_BCAT, "Invalid title ID!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_INVALID_ARGUMENT);
- }
-
- if (passphrase_raw.size() > 0x40) {
- LOG_ERROR(Service_BCAT, "Passphrase too large!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_INVALID_ARGUMENT);
- return;
- }
-
- Passphrase passphrase{};
- std::memcpy(passphrase.data(), passphrase_raw.data(),
- std::min(passphrase.size(), passphrase_raw.size()));
-
- backend.SetPassphrase(title_id, passphrase);
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- void ClearDeliveryCacheStorage(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto title_id = rp.PopRaw<u64>();
-
- LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);
-
- if (title_id == 0) {
- LOG_ERROR(Service_BCAT, "Invalid title ID!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_INVALID_ARGUMENT);
- return;
- }
-
- if (!backend.Clear(title_id)) {
- LOG_ERROR(Service_BCAT, "Could not clear the directory successfully!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_FAILED_CLEAR_CACHE);
- return;
- }
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- ProgressServiceBackend& GetProgressBackend(SyncType type) {
- return progress.at(static_cast<size_t>(type));
- }
-
- const ProgressServiceBackend& GetProgressBackend(SyncType type) const {
- return progress.at(static_cast<size_t>(type));
- }
-
- Backend& backend;
- std::array<ProgressServiceBackend, static_cast<size_t>(SyncType::Count)> progress;
-};
-
-void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<IBcatService>(system, *backend);
-}
-
-class IDeliveryCacheFileService final : public ServiceFramework<IDeliveryCacheFileService> {
-public:
- explicit IDeliveryCacheFileService(Core::System& system_, FileSys::VirtualDir root_)
- : ServiceFramework{system_, "IDeliveryCacheFileService"}, root(std::move(root_)) {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, &IDeliveryCacheFileService::Open, "Open"},
- {1, &IDeliveryCacheFileService::Read, "Read"},
- {2, &IDeliveryCacheFileService::GetSize, "GetSize"},
- {3, &IDeliveryCacheFileService::GetDigest, "GetDigest"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
- }
-
-private:
- void Open(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto dir_name_raw = rp.PopRaw<DirectoryName>();
- const auto file_name_raw = rp.PopRaw<FileName>();
-
- const auto dir_name =
- Common::StringFromFixedZeroTerminatedBuffer(dir_name_raw.data(), dir_name_raw.size());
- const auto file_name =
- Common::StringFromFixedZeroTerminatedBuffer(file_name_raw.data(), file_name_raw.size());
-
- LOG_DEBUG(Service_BCAT, "called, dir_name={}, file_name={}", dir_name, file_name);
-
- if (!VerifyNameValidDir(ctx, dir_name_raw) || !VerifyNameValidFile(ctx, file_name_raw))
- return;
-
- if (current_file != nullptr) {
- LOG_ERROR(Service_BCAT, "A file has already been opened on this interface!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_ENTITY_ALREADY_OPEN);
- return;
- }
-
- const auto dir = root->GetSubdirectory(dir_name);
-
- if (dir == nullptr) {
- LOG_ERROR(Service_BCAT, "The directory of name={} couldn't be opened!", dir_name);
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_FAILED_OPEN_ENTITY);
- return;
- }
-
- current_file = dir->GetFile(file_name);
-
- if (current_file == nullptr) {
- LOG_ERROR(Service_BCAT, "The file of name={} couldn't be opened!", file_name);
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_FAILED_OPEN_ENTITY);
- return;
- }
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- void Read(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto offset{rp.PopRaw<u64>()};
-
- auto size = ctx.GetWriteBufferSize();
-
- LOG_DEBUG(Service_BCAT, "called, offset={:016X}, size={:016X}", offset, size);
-
- if (current_file == nullptr) {
- LOG_ERROR(Service_BCAT, "There is no file currently open!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_NO_OPEN_ENTITY);
- }
-
- size = std::min<u64>(current_file->GetSize() - offset, size);
- const auto buffer = current_file->ReadBytes(size, offset);
- ctx.WriteBuffer(buffer);
-
- IPC::ResponseBuilder rb{ctx, 4};
- rb.Push(ResultSuccess);
- rb.Push<u64>(buffer.size());
- }
-
- void GetSize(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- if (current_file == nullptr) {
- LOG_ERROR(Service_BCAT, "There is no file currently open!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_NO_OPEN_ENTITY);
- }
-
- IPC::ResponseBuilder rb{ctx, 4};
- rb.Push(ResultSuccess);
- rb.Push<u64>(current_file->GetSize());
- }
-
- void GetDigest(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- if (current_file == nullptr) {
- LOG_ERROR(Service_BCAT, "There is no file currently open!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_NO_OPEN_ENTITY);
- }
-
- IPC::ResponseBuilder rb{ctx, 6};
- rb.Push(ResultSuccess);
- rb.PushRaw(DigestFile(current_file));
- }
-
- FileSys::VirtualDir root;
- FileSys::VirtualFile current_file;
-};
-
-class IDeliveryCacheDirectoryService final
- : public ServiceFramework<IDeliveryCacheDirectoryService> {
-public:
- explicit IDeliveryCacheDirectoryService(Core::System& system_, FileSys::VirtualDir root_)
- : ServiceFramework{system_, "IDeliveryCacheDirectoryService"}, root(std::move(root_)) {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, &IDeliveryCacheDirectoryService::Open, "Open"},
- {1, &IDeliveryCacheDirectoryService::Read, "Read"},
- {2, &IDeliveryCacheDirectoryService::GetCount, "GetCount"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
- }
-
-private:
- void Open(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto name_raw = rp.PopRaw<DirectoryName>();
- const auto name =
- Common::StringFromFixedZeroTerminatedBuffer(name_raw.data(), name_raw.size());
-
- LOG_DEBUG(Service_BCAT, "called, name={}", name);
-
- if (!VerifyNameValidDir(ctx, name_raw))
- return;
-
- if (current_dir != nullptr) {
- LOG_ERROR(Service_BCAT, "A file has already been opened on this interface!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_ENTITY_ALREADY_OPEN);
- return;
- }
-
- current_dir = root->GetSubdirectory(name);
-
- if (current_dir == nullptr) {
- LOG_ERROR(Service_BCAT, "Failed to open the directory name={}!", name);
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_FAILED_OPEN_ENTITY);
- return;
- }
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- void Read(Kernel::HLERequestContext& ctx) {
- auto write_size = ctx.GetWriteBufferSize() / sizeof(DeliveryCacheDirectoryEntry);
-
- LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size);
-
- if (current_dir == nullptr) {
- LOG_ERROR(Service_BCAT, "There is no open directory!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_NO_OPEN_ENTITY);
- return;
- }
-
- const auto files = current_dir->GetFiles();
- write_size = std::min<u64>(write_size, files.size());
- std::vector<DeliveryCacheDirectoryEntry> entries(write_size);
- std::transform(
- files.begin(), files.begin() + write_size, entries.begin(), [](const auto& file) {
- FileName name{};
- std::memcpy(name.data(), file->GetName().data(),
- std::min(file->GetName().size(), name.size()));
- return DeliveryCacheDirectoryEntry{name, file->GetSize(), DigestFile(file)};
- });
-
- ctx.WriteBuffer(entries);
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.Push(static_cast<u32>(write_size * sizeof(DeliveryCacheDirectoryEntry)));
- }
-
- void GetCount(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- if (current_dir == nullptr) {
- LOG_ERROR(Service_BCAT, "There is no open directory!");
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ERROR_NO_OPEN_ENTITY);
- return;
- }
-
- const auto files = current_dir->GetFiles();
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.Push(static_cast<u32>(files.size()));
- }
-
- FileSys::VirtualDir root;
- FileSys::VirtualDir current_dir;
-};
-
-class IDeliveryCacheStorageService final : public ServiceFramework<IDeliveryCacheStorageService> {
-public:
- explicit IDeliveryCacheStorageService(Core::System& system_, FileSys::VirtualDir root_)
- : ServiceFramework{system_, "IDeliveryCacheStorageService"}, root(std::move(root_)) {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, &IDeliveryCacheStorageService::CreateFileService, "CreateFileService"},
- {1, &IDeliveryCacheStorageService::CreateDirectoryService, "CreateDirectoryService"},
- {10, &IDeliveryCacheStorageService::EnumerateDeliveryCacheDirectory, "EnumerateDeliveryCacheDirectory"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
-
- for (const auto& subdir : root->GetSubdirectories()) {
- DirectoryName name{};
- std::memcpy(name.data(), subdir->GetName().data(),
- std::min(sizeof(DirectoryName) - 1, subdir->GetName().size()));
- entries.push_back(name);
- }
- }
-
-private:
- void CreateFileService(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<IDeliveryCacheFileService>(system, root);
- }
-
- void CreateDirectoryService(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<IDeliveryCacheDirectoryService>(system, root);
- }
-
- void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) {
- auto size = ctx.GetWriteBufferSize() / sizeof(DirectoryName);
-
- LOG_DEBUG(Service_BCAT, "called, size={:016X}", size);
-
- size = std::min<u64>(size, entries.size() - next_read_index);
- ctx.WriteBuffer(entries.data() + next_read_index, size * sizeof(DirectoryName));
- next_read_index += size;
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.Push(static_cast<u32>(size));
- }
-
- FileSys::VirtualDir root;
- std::vector<DirectoryName> entries;
- u64 next_read_index = 0;
-};
-
-void Module::Interface::CreateDeliveryCacheStorageService(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_BCAT, "called");
-
- const auto title_id = system.CurrentProcess()->GetTitleID();
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<IDeliveryCacheStorageService>(system, fsc.GetBCATDirectory(title_id));
-}
-
-void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId(
- Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto title_id = rp.PopRaw<u64>();
-
- LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<IDeliveryCacheStorageService>(system, fsc.GetBCATDirectory(title_id));
-}
-
-std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system,
- DirectoryGetter getter) {
-#ifdef YUZU_ENABLE_BOXCAT
- if (Settings::values.bcat_backend == "boxcat") {
- return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter));
- }
-#endif
-
- return std::make_unique<NullBackend>(std::move(getter));
-}
-
-Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
- FileSystem::FileSystemController& fsc_, const char* name)
- : ServiceFramework{system_, name}, fsc{fsc_}, module{std::move(module_)},
- backend{CreateBackendFromSettings(system_,
- [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })} {}
-
-Module::Interface::~Interface() = default;
-
-void InstallInterfaces(Core::System& system) {
- auto module = std::make_shared<Module>();
- std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:a")
- ->InstallAsService(system.ServiceManager());
- std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:m")
- ->InstallAsService(system.ServiceManager());
- std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:u")
- ->InstallAsService(system.ServiceManager());
- std::make_shared<BCAT>(system, module, system.GetFileSystemController(), "bcat:s")
- ->InstallAsService(system.ServiceManager());
-}
-
-} // namespace Service::BCAT
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 3c16fe6c7..4a9b13e45 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -703,6 +703,16 @@ FileSys::VirtualDir FileSystemController::GetModificationLoadRoot(u64 title_id)
return bis_factory->GetModificationLoadRoot(title_id);
}
+FileSys::VirtualDir FileSystemController::GetSDMCModificationLoadRoot(u64 title_id) const {
+ LOG_TRACE(Service_FS, "Opening SDMC mod load root for tid={:016X}", title_id);
+
+ if (sdmc_factory == nullptr) {
+ return nullptr;
+ }
+
+ return sdmc_factory->GetSDMCModificationLoadRoot(title_id);
+}
+
FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const {
LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id);
@@ -733,20 +743,23 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
}
using YuzuPath = Common::FS::YuzuPath;
+ const auto sdmc_dir_path = Common::FS::GetYuzuPath(YuzuPath::SDMCDir);
+ const auto sdmc_load_dir_path = sdmc_dir_path / "atmosphere/contents";
const auto rw_mode = FileSys::Mode::ReadWrite;
auto nand_directory =
vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode);
- auto sd_directory =
- vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::SDMCDir), rw_mode);
+ auto sd_directory = vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_dir_path), rw_mode);
auto load_directory =
vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read);
+ auto sd_load_directory =
+ vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_load_dir_path), FileSys::Mode::Read);
auto dump_directory =
vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode);
if (bis_factory == nullptr) {
- bis_factory =
- std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory);
+ bis_factory = std::make_unique<FileSys::BISFactory>(
+ nand_directory, std::move(load_directory), std::move(dump_directory));
system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND,
bis_factory->GetSystemNANDContents());
system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND,
@@ -759,7 +772,8 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
}
if (sdmc_factory == nullptr) {
- sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory));
+ sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory),
+ std::move(sd_load_directory));
system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
sdmc_factory->GetSDMCContents());
}
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index b6b1b9220..d387af3cb 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -115,6 +115,7 @@ public:
FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const;
FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const;
+ FileSys::VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const;
FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const;
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index c5f88bce7..a3c939c0c 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -12,7 +12,7 @@
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/service/friend/errors.h"
#include "core/hle/service/friend/friend.h"
-#include "core/hle/service/friend/interface.h"
+#include "core/hle/service/friend/friend_interface.h"
namespace Service::Friend {
diff --git a/src/core/hle/service/friend/friend_interface.cpp b/src/core/hle/service/friend/friend_interface.cpp
new file mode 100644
index 000000000..9b18b2a32
--- /dev/null
+++ b/src/core/hle/service/friend/friend_interface.cpp
@@ -0,0 +1,21 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/friend/friend_interface.h"
+
+namespace Service::Friend {
+
+Friend::Friend(std::shared_ptr<Module> module_, Core::System& system_, const char* name)
+ : Interface(std::move(module_), system_, name) {
+ static const FunctionInfo functions[] = {
+ {0, &Friend::CreateFriendService, "CreateFriendService"},
+ {1, &Friend::CreateNotificationService, "CreateNotificationService"},
+ {2, nullptr, "CreateDaemonSuspendSessionService"},
+ };
+ RegisterHandlers(functions);
+}
+
+Friend::~Friend() = default;
+
+} // namespace Service::Friend
diff --git a/src/core/hle/service/friend/interface.h b/src/core/hle/service/friend/friend_interface.h
index 43d914b32..43d914b32 100644
--- a/src/core/hle/service/friend/interface.h
+++ b/src/core/hle/service/friend/friend_interface.h
diff --git a/src/core/hle/service/friend/interface.cpp b/src/core/hle/service/friend/interface.cpp
deleted file mode 100644
index 7368ccec2..000000000
--- a/src/core/hle/service/friend/interface.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/hle/service/friend/interface.h"
-
-namespace Service::Friend {
-
-Friend::Friend(std::shared_ptr<Module> module_, Core::System& system_, const char* name)
- : Interface(std::move(module_), system_, name) {
- static const FunctionInfo functions[] = {
- {0, &Friend::CreateFriendService, "CreateFriendService"},
- {1, &Friend::CreateNotificationService, "CreateNotificationService"},
- {2, nullptr, "CreateDaemonSuspendSessionService"},
- };
- RegisterHandlers(functions);
-}
-
-Friend::~Friend() = default;
-
-} // namespace Service::Friend
diff --git a/src/core/hle/service/glue/arp.cpp b/src/core/hle/service/glue/arp.cpp
index ca25df67e..5a3b54cc1 100644
--- a/src/core/hle/service/glue/arp.cpp
+++ b/src/core/hle/service/glue/arp.cpp
@@ -13,7 +13,7 @@
#include "core/hle/kernel/kernel.h"
#include "core/hle/service/glue/arp.h"
#include "core/hle/service/glue/errors.h"
-#include "core/hle/service/glue/manager.h"
+#include "core/hle/service/glue/glue_manager.h"
#include "core/hle/service/service.h"
namespace Service::Glue {
diff --git a/src/core/hle/service/glue/glue_manager.cpp b/src/core/hle/service/glue/glue_manager.cpp
new file mode 100644
index 000000000..aa9d48c0c
--- /dev/null
+++ b/src/core/hle/service/glue/glue_manager.cpp
@@ -0,0 +1,78 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/glue/errors.h"
+#include "core/hle/service/glue/glue_manager.h"
+
+namespace Service::Glue {
+
+struct ARPManager::MapEntry {
+ ApplicationLaunchProperty launch;
+ std::vector<u8> control;
+};
+
+ARPManager::ARPManager() = default;
+
+ARPManager::~ARPManager() = default;
+
+ResultVal<ApplicationLaunchProperty> ARPManager::GetLaunchProperty(u64 title_id) const {
+ if (title_id == 0) {
+ return ERR_INVALID_PROCESS_ID;
+ }
+
+ const auto iter = entries.find(title_id);
+ if (iter == entries.end()) {
+ return ERR_NOT_REGISTERED;
+ }
+
+ return MakeResult<ApplicationLaunchProperty>(iter->second.launch);
+}
+
+ResultVal<std::vector<u8>> ARPManager::GetControlProperty(u64 title_id) const {
+ if (title_id == 0) {
+ return ERR_INVALID_PROCESS_ID;
+ }
+
+ const auto iter = entries.find(title_id);
+ if (iter == entries.end()) {
+ return ERR_NOT_REGISTERED;
+ }
+
+ return MakeResult<std::vector<u8>>(iter->second.control);
+}
+
+ResultCode ARPManager::Register(u64 title_id, ApplicationLaunchProperty launch,
+ std::vector<u8> control) {
+ if (title_id == 0) {
+ return ERR_INVALID_PROCESS_ID;
+ }
+
+ const auto iter = entries.find(title_id);
+ if (iter != entries.end()) {
+ return ERR_INVALID_ACCESS;
+ }
+
+ entries.insert_or_assign(title_id, MapEntry{launch, std::move(control)});
+ return ResultSuccess;
+}
+
+ResultCode ARPManager::Unregister(u64 title_id) {
+ if (title_id == 0) {
+ return ERR_INVALID_PROCESS_ID;
+ }
+
+ const auto iter = entries.find(title_id);
+ if (iter == entries.end()) {
+ return ERR_NOT_REGISTERED;
+ }
+
+ entries.erase(iter);
+ return ResultSuccess;
+}
+
+void ARPManager::ResetAll() {
+ entries.clear();
+}
+
+} // namespace Service::Glue
diff --git a/src/core/hle/service/glue/manager.h b/src/core/hle/service/glue/glue_manager.h
index a7f5ce3ee..a7f5ce3ee 100644
--- a/src/core/hle/service/glue/manager.h
+++ b/src/core/hle/service/glue/glue_manager.h
diff --git a/src/core/hle/service/glue/manager.cpp b/src/core/hle/service/glue/manager.cpp
deleted file mode 100644
index 9b1754cf8..000000000
--- a/src/core/hle/service/glue/manager.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2019 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/hle/service/glue/errors.h"
-#include "core/hle/service/glue/manager.h"
-
-namespace Service::Glue {
-
-struct ARPManager::MapEntry {
- ApplicationLaunchProperty launch;
- std::vector<u8> control;
-};
-
-ARPManager::ARPManager() = default;
-
-ARPManager::~ARPManager() = default;
-
-ResultVal<ApplicationLaunchProperty> ARPManager::GetLaunchProperty(u64 title_id) const {
- if (title_id == 0) {
- return ERR_INVALID_PROCESS_ID;
- }
-
- const auto iter = entries.find(title_id);
- if (iter == entries.end()) {
- return ERR_NOT_REGISTERED;
- }
-
- return MakeResult<ApplicationLaunchProperty>(iter->second.launch);
-}
-
-ResultVal<std::vector<u8>> ARPManager::GetControlProperty(u64 title_id) const {
- if (title_id == 0) {
- return ERR_INVALID_PROCESS_ID;
- }
-
- const auto iter = entries.find(title_id);
- if (iter == entries.end()) {
- return ERR_NOT_REGISTERED;
- }
-
- return MakeResult<std::vector<u8>>(iter->second.control);
-}
-
-ResultCode ARPManager::Register(u64 title_id, ApplicationLaunchProperty launch,
- std::vector<u8> control) {
- if (title_id == 0) {
- return ERR_INVALID_PROCESS_ID;
- }
-
- const auto iter = entries.find(title_id);
- if (iter != entries.end()) {
- return ERR_INVALID_ACCESS;
- }
-
- entries.insert_or_assign(title_id, MapEntry{launch, std::move(control)});
- return ResultSuccess;
-}
-
-ResultCode ARPManager::Unregister(u64 title_id) {
- if (title_id == 0) {
- return ERR_INVALID_PROCESS_ID;
- }
-
- const auto iter = entries.find(title_id);
- if (iter == entries.end()) {
- return ERR_NOT_REGISTERED;
- }
-
- entries.erase(iter);
- return ResultSuccess;
-}
-
-void ARPManager::ResetAll() {
- entries.clear();
-}
-
-} // namespace Service::Glue
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 7acad3798..b7f551e40 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -18,6 +18,7 @@
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/service/hid/controllers/npad.h"
+#include "core/hle/service/kernel_helpers.h"
namespace Service::HID {
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
@@ -147,7 +148,9 @@ bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) {
device_handle.device_index < DeviceIndex::MaxDeviceIndex;
}
-Controller_NPad::Controller_NPad(Core::System& system_) : ControllerBase{system_} {
+Controller_NPad::Controller_NPad(Core::System& system_,
+ KernelHelpers::ServiceContext& service_context_)
+ : ControllerBase{system_}, service_context{service_context_} {
latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE});
}
@@ -251,10 +254,9 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
}
void Controller_NPad::OnInit() {
- auto& kernel = system.Kernel();
for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) {
- styleset_changed_events[i] = Kernel::KEvent::Create(kernel);
- styleset_changed_events[i]->Initialize(fmt::format("npad:NpadStyleSetChanged_{}", i));
+ styleset_changed_events[i] =
+ service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i));
}
if (!IsControllerActivated()) {
@@ -314,6 +316,8 @@ void Controller_NPad::OnInit() {
void Controller_NPad::OnLoadInputDevices() {
const auto& players = Settings::values.players.GetValue();
+
+ std::lock_guard lock{mutex};
for (std::size_t i = 0; i < players.size(); ++i) {
std::transform(players[i].buttons.begin() + Settings::NativeButton::BUTTON_HID_BEGIN,
players[i].buttons.begin() + Settings::NativeButton::BUTTON_HID_END,
@@ -342,12 +346,13 @@ void Controller_NPad::OnRelease() {
}
for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) {
- styleset_changed_events[i]->Close();
- styleset_changed_events[i] = nullptr;
+ service_context.CloseEvent(styleset_changed_events[i]);
}
}
void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
+ std::lock_guard lock{mutex};
+
const auto controller_idx = NPadIdToIndex(npad_id);
const auto controller_type = connected_controllers[controller_idx].type;
if (!connected_controllers[controller_idx].is_connected) {
@@ -937,6 +942,11 @@ void Controller_NPad::InitializeVibrationDevice(const DeviceHandle& vibration_de
void Controller_NPad::InitializeVibrationDeviceAtIndex(std::size_t npad_index,
std::size_t device_index) {
+ if (!Settings::values.vibration_enabled.GetValue()) {
+ vibration_devices_mounted[npad_index][device_index] = false;
+ return;
+ }
+
if (vibrations[npad_index][device_index]) {
vibration_devices_mounted[npad_index][device_index] =
vibrations[npad_index][device_index]->GetStatus() == 1;
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index c050c9a44..4fcc6f93a 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -6,6 +6,8 @@
#include <array>
#include <atomic>
+#include <mutex>
+
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/quaternion.h"
@@ -18,6 +20,10 @@ class KEvent;
class KReadableEvent;
} // namespace Kernel
+namespace Service::KernelHelpers {
+class ServiceContext;
+}
+
namespace Service::HID {
constexpr u32 NPAD_HANDHELD = 32;
@@ -25,7 +31,8 @@ constexpr u32 NPAD_UNKNOWN = 16; // TODO(ogniK): What is this?
class Controller_NPad final : public ControllerBase {
public:
- explicit Controller_NPad(Core::System& system_);
+ explicit Controller_NPad(Core::System& system_,
+ KernelHelpers::ServiceContext& service_context_);
~Controller_NPad() override;
// Called when the controller is initialized
@@ -563,6 +570,9 @@ private:
using MotionArray = std::array<
std::array<std::unique_ptr<Input::MotionDevice>, Settings::NativeMotion::NUM_MOTIONS_HID>,
10>;
+
+ KernelHelpers::ServiceContext& service_context;
+ std::mutex mutex;
ButtonArray buttons;
StickArray sticks;
VibrationArray vibrations;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index d68b023d0..b8b80570d 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -46,8 +46,9 @@ constexpr auto pad_update_ns = std::chrono::nanoseconds{1000 * 1000}; //
constexpr auto motion_update_ns = std::chrono::nanoseconds{15 * 1000 * 1000}; // (15ms, 66.666Hz)
constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
-IAppletResource::IAppletResource(Core::System& system_)
- : ServiceFramework{system_, "IAppletResource"} {
+IAppletResource::IAppletResource(Core::System& system_,
+ KernelHelpers::ServiceContext& service_context_)
+ : ServiceFramework{system_, "IAppletResource"}, service_context{service_context_} {
static const FunctionInfo functions[] = {
{0, &IAppletResource::GetSharedMemoryHandle, "GetSharedMemoryHandle"},
};
@@ -63,7 +64,7 @@ IAppletResource::IAppletResource(Core::System& system_)
MakeController<Controller_Stubbed>(HidController::CaptureButton);
MakeController<Controller_Stubbed>(HidController::InputDetector);
MakeController<Controller_Stubbed>(HidController::UniquePad);
- MakeController<Controller_NPad>(HidController::NPad);
+ MakeControllerWithServiceContext<Controller_NPad>(HidController::NPad);
MakeController<Controller_Gesture>(HidController::Gesture);
MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor);
@@ -191,13 +192,14 @@ private:
std::shared_ptr<IAppletResource> Hid::GetAppletResource() {
if (applet_resource == nullptr) {
- applet_resource = std::make_shared<IAppletResource>(system);
+ applet_resource = std::make_shared<IAppletResource>(system, service_context);
}
return applet_resource;
}
-Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} {
+Hid::Hid(Core::System& system_)
+ : ServiceFramework{system_, "hid"}, service_context{system_, service_name} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &Hid::CreateAppletResource, "CreateAppletResource"},
@@ -347,7 +349,7 @@ void Hid::CreateAppletResource(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
if (applet_resource == nullptr) {
- applet_resource = std::make_shared<IAppletResource>(system);
+ applet_resource = std::make_shared<IAppletResource>(system, service_context);
}
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 83fc2ea1d..9c5c7f252 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -7,6 +7,7 @@
#include <chrono>
#include "core/hle/service/hid/controllers/controller_base.h"
+#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/service.h"
namespace Core::Timing {
@@ -39,7 +40,8 @@ enum class HidController : std::size_t {
class IAppletResource final : public ServiceFramework<IAppletResource> {
public:
- explicit IAppletResource(Core::System& system_);
+ explicit IAppletResource(Core::System& system_,
+ KernelHelpers::ServiceContext& service_context_);
~IAppletResource() override;
void ActivateController(HidController controller);
@@ -60,11 +62,18 @@ private:
void MakeController(HidController controller) {
controllers[static_cast<std::size_t>(controller)] = std::make_unique<T>(system);
}
+ template <typename T>
+ void MakeControllerWithServiceContext(HidController controller) {
+ controllers[static_cast<std::size_t>(controller)] =
+ std::make_unique<T>(system, service_context);
+ }
void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx);
void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late);
void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late);
+ KernelHelpers::ServiceContext& service_context;
+
std::shared_ptr<Core::Timing::EventType> pad_update_event;
std::shared_ptr<Core::Timing::EventType> motion_update_event;
@@ -176,6 +185,8 @@ private:
static_assert(sizeof(VibrationDeviceInfo) == 0x8, "VibrationDeviceInfo has incorrect size.");
std::shared_ptr<IAppletResource> applet_resource;
+
+ KernelHelpers::ServiceContext service_context;
};
/// Reload input devices. Used when input configuration changed
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
new file mode 100644
index 000000000..62f4cdfb2
--- /dev/null
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/kernel/k_event.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/kernel/k_readable_event.h"
+#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
+#include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/service/kernel_helpers.h"
+
+namespace Service::KernelHelpers {
+
+ServiceContext::ServiceContext(Core::System& system_, std::string name_)
+ : kernel(system_.Kernel()) {
+ process = Kernel::KProcess::Create(kernel);
+ ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_),
+ Kernel::KProcess::ProcessType::Userland)
+ .IsSuccess());
+}
+
+ServiceContext::~ServiceContext() {
+ process->Close();
+ process = nullptr;
+}
+
+Kernel::KEvent* ServiceContext::CreateEvent(std::string&& name) {
+ // Reserve a new event from the process resource limit
+ Kernel::KScopedResourceReservation event_reservation(process,
+ Kernel::LimitableResource::Events);
+ if (!event_reservation.Succeeded()) {
+ LOG_CRITICAL(Service, "Resource limit reached!");
+ return {};
+ }
+
+ // Create a new event.
+ auto* event = Kernel::KEvent::Create(kernel);
+ if (!event) {
+ LOG_CRITICAL(Service, "Unable to create event!");
+ return {};
+ }
+
+ // Initialize the event.
+ event->Initialize(std::move(name));
+
+ // Commit the thread reservation.
+ event_reservation.Commit();
+
+ // Register the event.
+ Kernel::KEvent::Register(kernel, event);
+
+ return event;
+}
+
+void ServiceContext::CloseEvent(Kernel::KEvent* event) {
+ event->GetReadableEvent().Close();
+ event->GetWritableEvent().Close();
+}
+
+} // namespace Service::KernelHelpers
diff --git a/src/core/hle/service/kernel_helpers.h b/src/core/hle/service/kernel_helpers.h
new file mode 100644
index 000000000..4f3e95f67
--- /dev/null
+++ b/src/core/hle/service/kernel_helpers.h
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+namespace Core {
+class System;
+}
+
+namespace Kernel {
+class KernelCore;
+class KEvent;
+class KProcess;
+} // namespace Kernel
+
+namespace Service::KernelHelpers {
+
+class ServiceContext {
+public:
+ ServiceContext(Core::System& system_, std::string name_);
+ ~ServiceContext();
+
+ Kernel::KEvent* CreateEvent(std::string&& name);
+
+ void CloseEvent(Kernel::KEvent* event);
+
+private:
+ Kernel::KernelCore& kernel;
+ Kernel::KProcess* process{};
+};
+
+} // namespace Service::KernelHelpers
diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp
deleted file mode 100644
index 114aff31c..000000000
--- a/src/core/hle/service/mii/manager.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-// Copyright 2020 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <random>
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/string_util.h"
-
-#include "core/hle/service/acc/profile_manager.h"
-#include "core/hle/service/mii/manager.h"
-#include "core/hle/service/mii/raw_data.h"
-#include "core/hle/service/mii/types.h"
-
-namespace Service::Mii {
-
-namespace {
-
-constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4};
-
-constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()};
-
-constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'};
-constexpr std::array<u8, 8> HairColorLookup{8, 1, 2, 3, 4, 5, 6, 7};
-constexpr std::array<u8, 6> EyeColorLookup{8, 9, 10, 11, 12, 13};
-constexpr std::array<u8, 5> MouthColorLookup{19, 20, 21, 22, 23};
-constexpr std::array<u8, 7> GlassesColorLookup{8, 14, 15, 16, 17, 18, 0};
-constexpr std::array<u8, 62> EyeRotateLookup{
- {0x03, 0x04, 0x04, 0x04, 0x03, 0x04, 0x04, 0x04, 0x03, 0x04, 0x04, 0x04, 0x04, 0x03, 0x03, 0x04,
- 0x04, 0x04, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x03, 0x04, 0x04,
- 0x04, 0x03, 0x03, 0x03, 0x04, 0x04, 0x03, 0x03, 0x03, 0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x03,
- 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x03, 0x04, 0x04, 0x03, 0x04, 0x04}};
-constexpr std::array<u8, 24> EyebrowRotateLookup{{0x06, 0x06, 0x05, 0x07, 0x06, 0x07, 0x06, 0x07,
- 0x04, 0x07, 0x06, 0x08, 0x05, 0x05, 0x06, 0x06,
- 0x07, 0x07, 0x06, 0x06, 0x05, 0x06, 0x07, 0x05}};
-
-template <typename T, std::size_t SourceArraySize, std::size_t DestArraySize>
-std::array<T, DestArraySize> ResizeArray(const std::array<T, SourceArraySize>& in) {
- std::array<T, DestArraySize> out{};
- std::memcpy(out.data(), in.data(), sizeof(T) * std::min(SourceArraySize, DestArraySize));
- return out;
-}
-
-MiiInfo ConvertStoreDataToInfo(const MiiStoreData& data) {
- MiiStoreBitFields bf;
- std::memcpy(&bf, data.data.data.data(), sizeof(MiiStoreBitFields));
-
- return {
- .uuid = data.data.uuid,
- .name = ResizeArray<char16_t, 10, 11>(data.data.name),
- .font_region = static_cast<u8>(bf.font_region.Value()),
- .favorite_color = static_cast<u8>(bf.favorite_color.Value()),
- .gender = static_cast<u8>(bf.gender.Value()),
- .height = static_cast<u8>(bf.height.Value()),
- .build = static_cast<u8>(bf.build.Value()),
- .type = static_cast<u8>(bf.type.Value()),
- .region_move = static_cast<u8>(bf.region_move.Value()),
- .faceline_type = static_cast<u8>(bf.faceline_type.Value()),
- .faceline_color = static_cast<u8>(bf.faceline_color.Value()),
- .faceline_wrinkle = static_cast<u8>(bf.faceline_wrinkle.Value()),
- .faceline_make = static_cast<u8>(bf.faceline_makeup.Value()),
- .hair_type = static_cast<u8>(bf.hair_type.Value()),
- .hair_color = static_cast<u8>(bf.hair_color.Value()),
- .hair_flip = static_cast<u8>(bf.hair_flip.Value()),
- .eye_type = static_cast<u8>(bf.eye_type.Value()),
- .eye_color = static_cast<u8>(bf.eye_color.Value()),
- .eye_scale = static_cast<u8>(bf.eye_scale.Value()),
- .eye_aspect = static_cast<u8>(bf.eye_aspect.Value()),
- .eye_rotate = static_cast<u8>(bf.eye_rotate.Value()),
- .eye_x = static_cast<u8>(bf.eye_x.Value()),
- .eye_y = static_cast<u8>(bf.eye_y.Value()),
- .eyebrow_type = static_cast<u8>(bf.eyebrow_type.Value()),
- .eyebrow_color = static_cast<u8>(bf.eyebrow_color.Value()),
- .eyebrow_scale = static_cast<u8>(bf.eyebrow_scale.Value()),
- .eyebrow_aspect = static_cast<u8>(bf.eyebrow_aspect.Value()),
- .eyebrow_rotate = static_cast<u8>(bf.eyebrow_rotate.Value()),
- .eyebrow_x = static_cast<u8>(bf.eyebrow_x.Value()),
- .eyebrow_y = static_cast<u8>(bf.eyebrow_y.Value() + 3),
- .nose_type = static_cast<u8>(bf.nose_type.Value()),
- .nose_scale = static_cast<u8>(bf.nose_scale.Value()),
- .nose_y = static_cast<u8>(bf.nose_y.Value()),
- .mouth_type = static_cast<u8>(bf.mouth_type.Value()),
- .mouth_color = static_cast<u8>(bf.mouth_color.Value()),
- .mouth_scale = static_cast<u8>(bf.mouth_scale.Value()),
- .mouth_aspect = static_cast<u8>(bf.mouth_aspect.Value()),
- .mouth_y = static_cast<u8>(bf.mouth_y.Value()),
- .beard_color = static_cast<u8>(bf.beard_color.Value()),
- .beard_type = static_cast<u8>(bf.beard_type.Value()),
- .mustache_type = static_cast<u8>(bf.mustache_type.Value()),
- .mustache_scale = static_cast<u8>(bf.mustache_scale.Value()),
- .mustache_y = static_cast<u8>(bf.mustache_y.Value()),
- .glasses_type = static_cast<u8>(bf.glasses_type.Value()),
- .glasses_color = static_cast<u8>(bf.glasses_color.Value()),
- .glasses_scale = static_cast<u8>(bf.glasses_scale.Value()),
- .glasses_y = static_cast<u8>(bf.glasses_y.Value()),
- .mole_type = static_cast<u8>(bf.mole_type.Value()),
- .mole_scale = static_cast<u8>(bf.mole_scale.Value()),
- .mole_x = static_cast<u8>(bf.mole_x.Value()),
- .mole_y = static_cast<u8>(bf.mole_y.Value()),
- .padding = 0,
- };
-}
-
-u16 GenerateCrc16(const void* data, std::size_t size) {
- s32 crc{};
- for (std::size_t i = 0; i < size; i++) {
- crc ^= static_cast<const u8*>(data)[i] << 8;
- for (std::size_t j = 0; j < 8; j++) {
- crc <<= 1;
- if ((crc & 0x10000) != 0) {
- crc = (crc ^ 0x1021) & 0xFFFF;
- }
- }
- }
- return Common::swap16(static_cast<u16>(crc));
-}
-
-Common::UUID GenerateValidUUID() {
- auto uuid{Common::UUID::Generate()};
-
- // Bit 7 must be set, and bit 6 unset for the UUID to be valid
- uuid.uuid[1] &= 0xFFFFFFFFFFFFFF3FULL;
- uuid.uuid[1] |= 0x0000000000000080ULL;
-
- return uuid;
-}
-
-template <typename T>
-T GetRandomValue(T min, T max) {
- std::random_device device;
- std::mt19937 gen(device());
- std::uniform_int_distribution<u64> distribution(static_cast<u64>(min), static_cast<u64>(max));
- return static_cast<T>(distribution(gen));
-}
-
-template <typename T>
-T GetRandomValue(T max) {
- return GetRandomValue<T>({}, max);
-}
-
-MiiStoreData BuildRandomStoreData(Age age, Gender gender, Race race, const Common::UUID& user_id) {
- MiiStoreBitFields bf{};
-
- if (gender == Gender::All) {
- gender = GetRandomValue<Gender>(Gender::Maximum);
- }
-
- bf.gender.Assign(gender);
- bf.favorite_color.Assign(GetRandomValue<u8>(11));
- bf.region_move.Assign(0);
- bf.font_region.Assign(FontRegion::Standard);
- bf.type.Assign(0);
- bf.height.Assign(64);
- bf.build.Assign(64);
-
- if (age == Age::All) {
- const auto temp{GetRandomValue<int>(10)};
- if (temp >= 8) {
- age = Age::Old;
- } else if (temp >= 4) {
- age = Age::Normal;
- } else {
- age = Age::Young;
- }
- }
-
- if (race == Race::All) {
- const auto temp{GetRandomValue<int>(10)};
- if (temp >= 8) {
- race = Race::Black;
- } else if (temp >= 4) {
- race = Race::White;
- } else {
- race = Race::Asian;
- }
- }
-
- u32 axis_y{};
- if (gender == Gender::Female && age == Age::Young) {
- axis_y = GetRandomValue<u32>(3);
- }
-
- const std::size_t index{3 * static_cast<std::size_t>(age) +
- 9 * static_cast<std::size_t>(gender) + static_cast<std::size_t>(race)};
-
- const auto faceline_type_info{RawData::RandomMiiFaceline.at(index)};
- const auto faceline_color_info{RawData::RandomMiiFacelineColor.at(
- 3 * static_cast<std::size_t>(gender) + static_cast<std::size_t>(race))};
- const auto faceline_wrinkle_info{RawData::RandomMiiFacelineWrinkle.at(index)};
- const auto faceline_makeup_info{RawData::RandomMiiFacelineMakeup.at(index)};
- const auto hair_type_info{RawData::RandomMiiHairType.at(index)};
- const auto hair_color_info{RawData::RandomMiiHairColor.at(3 * static_cast<std::size_t>(race) +
- static_cast<std::size_t>(age))};
- const auto eye_type_info{RawData::RandomMiiEyeType.at(index)};
- const auto eye_color_info{RawData::RandomMiiEyeColor.at(static_cast<std::size_t>(race))};
- const auto eyebrow_type_info{RawData::RandomMiiEyebrowType.at(index)};
- const auto nose_type_info{RawData::RandomMiiNoseType.at(index)};
- const auto mouth_type_info{RawData::RandomMiiMouthType.at(index)};
- const auto glasses_type_info{RawData::RandomMiiGlassType.at(static_cast<std::size_t>(age))};
-
- bf.faceline_type.Assign(
- faceline_type_info.values[GetRandomValue<std::size_t>(faceline_type_info.values_count)]);
- bf.faceline_color.Assign(
- faceline_color_info.values[GetRandomValue<std::size_t>(faceline_color_info.values_count)]);
- bf.faceline_wrinkle.Assign(
- faceline_wrinkle_info
- .values[GetRandomValue<std::size_t>(faceline_wrinkle_info.values_count)]);
- bf.faceline_makeup.Assign(
- faceline_makeup_info
- .values[GetRandomValue<std::size_t>(faceline_makeup_info.values_count)]);
-
- bf.hair_type.Assign(
- hair_type_info.values[GetRandomValue<std::size_t>(hair_type_info.values_count)]);
- bf.hair_color.Assign(
- HairColorLookup[hair_color_info
- .values[GetRandomValue<std::size_t>(hair_color_info.values_count)]]);
- bf.hair_flip.Assign(GetRandomValue<HairFlip>(HairFlip::Maximum));
-
- bf.eye_type.Assign(
- eye_type_info.values[GetRandomValue<std::size_t>(eye_type_info.values_count)]);
-
- const auto eye_rotate_1{gender != Gender::Male ? 4 : 2};
- const auto eye_rotate_2{gender != Gender::Male ? 3 : 4};
- const auto eye_rotate_offset{32 - EyeRotateLookup[eye_rotate_1] + eye_rotate_2};
- const auto eye_rotate{32 - EyeRotateLookup[bf.eye_type]};
-
- bf.eye_color.Assign(
- EyeColorLookup[eye_color_info
- .values[GetRandomValue<std::size_t>(eye_color_info.values_count)]]);
- bf.eye_scale.Assign(4);
- bf.eye_aspect.Assign(3);
- bf.eye_rotate.Assign(eye_rotate_offset - eye_rotate);
- bf.eye_x.Assign(2);
- bf.eye_y.Assign(axis_y + 12);
-
- bf.eyebrow_type.Assign(
- eyebrow_type_info.values[GetRandomValue<std::size_t>(eyebrow_type_info.values_count)]);
-
- const auto eyebrow_rotate_1{race == Race::Asian ? 6 : 0};
- const auto eyebrow_y{race == Race::Asian ? 9 : 10};
- const auto eyebrow_rotate_offset{32 - EyebrowRotateLookup[eyebrow_rotate_1] + 6};
- const auto eyebrow_rotate{
- 32 - EyebrowRotateLookup[static_cast<std::size_t>(bf.eyebrow_type.Value())]};
-
- bf.eyebrow_color.Assign(bf.hair_color);
- bf.eyebrow_scale.Assign(4);
- bf.eyebrow_aspect.Assign(3);
- bf.eyebrow_rotate.Assign(eyebrow_rotate_offset - eyebrow_rotate);
- bf.eyebrow_x.Assign(2);
- bf.eyebrow_y.Assign(axis_y + eyebrow_y);
-
- const auto nose_scale{gender == Gender::Female ? 3 : 4};
-
- bf.nose_type.Assign(
- nose_type_info.values[GetRandomValue<std::size_t>(nose_type_info.values_count)]);
- bf.nose_scale.Assign(nose_scale);
- bf.nose_y.Assign(axis_y + 9);
-
- const auto mouth_color{gender == Gender::Female ? GetRandomValue<int>(4) : 0};
-
- bf.mouth_type.Assign(
- mouth_type_info.values[GetRandomValue<std::size_t>(mouth_type_info.values_count)]);
- bf.mouth_color.Assign(MouthColorLookup[mouth_color]);
- bf.mouth_scale.Assign(4);
- bf.mouth_aspect.Assign(3);
- bf.mouth_y.Assign(axis_y + 13);
-
- bf.beard_color.Assign(bf.hair_color);
- bf.mustache_scale.Assign(4);
-
- if (gender == Gender::Male && age != Age::Young && GetRandomValue<int>(10) < 2) {
- const auto mustache_and_beard_flag{
- GetRandomValue<BeardAndMustacheFlag>(BeardAndMustacheFlag::All)};
-
- auto beard_type{BeardType::None};
- auto mustache_type{MustacheType::None};
-
- if ((mustache_and_beard_flag & BeardAndMustacheFlag::Beard) ==
- BeardAndMustacheFlag::Beard) {
- beard_type = GetRandomValue<BeardType>(BeardType::Beard1, BeardType::Beard5);
- }
-
- if ((mustache_and_beard_flag & BeardAndMustacheFlag::Mustache) ==
- BeardAndMustacheFlag::Mustache) {
- mustache_type =
- GetRandomValue<MustacheType>(MustacheType::Mustache1, MustacheType::Mustache5);
- }
-
- bf.mustache_type.Assign(mustache_type);
- bf.beard_type.Assign(beard_type);
- bf.mustache_y.Assign(10);
- } else {
- bf.mustache_type.Assign(MustacheType::None);
- bf.beard_type.Assign(BeardType::None);
- bf.mustache_y.Assign(axis_y + 10);
- }
-
- const auto glasses_type_start{GetRandomValue<std::size_t>(100)};
- u8 glasses_type{};
- while (glasses_type_start < glasses_type_info.values[glasses_type]) {
- if (++glasses_type >= glasses_type_info.values_count) {
- UNREACHABLE();
- break;
- }
- }
-
- bf.glasses_type.Assign(glasses_type);
- bf.glasses_color.Assign(GlassesColorLookup[0]);
- bf.glasses_scale.Assign(4);
- bf.glasses_y.Assign(axis_y + 10);
-
- bf.mole_type.Assign(0);
- bf.mole_scale.Assign(4);
- bf.mole_x.Assign(2);
- bf.mole_y.Assign(20);
-
- return {DefaultMiiName, bf, user_id};
-}
-
-MiiStoreData BuildDefaultStoreData(const DefaultMii& info, const Common::UUID& user_id) {
- MiiStoreBitFields bf{};
-
- bf.font_region.Assign(info.font_region);
- bf.favorite_color.Assign(info.favorite_color);
- bf.gender.Assign(info.gender);
- bf.height.Assign(info.height);
- bf.build.Assign(info.weight);
- bf.type.Assign(info.type);
- bf.region_move.Assign(info.region);
- bf.faceline_type.Assign(info.face_type);
- bf.faceline_color.Assign(info.face_color);
- bf.faceline_wrinkle.Assign(info.face_wrinkle);
- bf.faceline_makeup.Assign(info.face_makeup);
- bf.hair_type.Assign(info.hair_type);
- bf.hair_color.Assign(HairColorLookup[info.hair_color]);
- bf.hair_flip.Assign(static_cast<HairFlip>(info.hair_flip));
- bf.eye_type.Assign(info.eye_type);
- bf.eye_color.Assign(EyeColorLookup[info.eye_color]);
- bf.eye_scale.Assign(info.eye_scale);
- bf.eye_aspect.Assign(info.eye_aspect);
- bf.eye_rotate.Assign(info.eye_rotate);
- bf.eye_x.Assign(info.eye_x);
- bf.eye_y.Assign(info.eye_y);
- bf.eyebrow_type.Assign(info.eyebrow_type);
- bf.eyebrow_color.Assign(HairColorLookup[info.eyebrow_color]);
- bf.eyebrow_scale.Assign(info.eyebrow_scale);
- bf.eyebrow_aspect.Assign(info.eyebrow_aspect);
- bf.eyebrow_rotate.Assign(info.eyebrow_rotate);
- bf.eyebrow_x.Assign(info.eyebrow_x);
- bf.eyebrow_y.Assign(info.eyebrow_y - 3);
- bf.nose_type.Assign(info.nose_type);
- bf.nose_scale.Assign(info.nose_scale);
- bf.nose_y.Assign(info.nose_y);
- bf.mouth_type.Assign(info.mouth_type);
- bf.mouth_color.Assign(MouthColorLookup[info.mouth_color]);
- bf.mouth_scale.Assign(info.mouth_scale);
- bf.mouth_aspect.Assign(info.mouth_aspect);
- bf.mouth_y.Assign(info.mouth_y);
- bf.beard_color.Assign(HairColorLookup[info.beard_color]);
- bf.beard_type.Assign(static_cast<BeardType>(info.beard_type));
- bf.mustache_type.Assign(static_cast<MustacheType>(info.mustache_type));
- bf.mustache_scale.Assign(info.mustache_scale);
- bf.mustache_y.Assign(info.mustache_y);
- bf.glasses_type.Assign(info.glasses_type);
- bf.glasses_color.Assign(GlassesColorLookup[info.glasses_color]);
- bf.glasses_scale.Assign(info.glasses_scale);
- bf.glasses_y.Assign(info.glasses_y);
- bf.mole_type.Assign(info.mole_type);
- bf.mole_scale.Assign(info.mole_scale);
- bf.mole_x.Assign(info.mole_x);
- bf.mole_y.Assign(info.mole_y);
-
- return {DefaultMiiName, bf, user_id};
-}
-
-} // namespace
-
-MiiStoreData::MiiStoreData() = default;
-
-MiiStoreData::MiiStoreData(const MiiStoreData::Name& name, const MiiStoreBitFields& bit_fields,
- const Common::UUID& user_id) {
- data.name = name;
- data.uuid = GenerateValidUUID();
-
- std::memcpy(data.data.data(), &bit_fields, sizeof(MiiStoreBitFields));
- data_crc = GenerateCrc16(data.data.data(), sizeof(data));
- device_crc = GenerateCrc16(&user_id, sizeof(Common::UUID));
-}
-
-MiiManager::MiiManager() : user_id{Service::Account::ProfileManager().GetLastOpenedUser()} {}
-
-bool MiiManager::CheckAndResetUpdateCounter(SourceFlag source_flag, u64& current_update_counter) {
- if ((source_flag & SourceFlag::Database) == SourceFlag::None) {
- return false;
- }
-
- const bool result{current_update_counter != update_counter};
-
- current_update_counter = update_counter;
-
- return result;
-}
-
-bool MiiManager::IsFullDatabase() const {
- // TODO(bunnei): We don't implement the Mii database, so it cannot be full
- return false;
-}
-
-u32 MiiManager::GetCount(SourceFlag source_flag) const {
- std::size_t count{};
- if ((source_flag & SourceFlag::Database) != SourceFlag::None) {
- // TODO(bunnei): We don't implement the Mii database, but when we do, update this
- count += 0;
- }
- if ((source_flag & SourceFlag::Default) != SourceFlag::None) {
- count += DefaultMiiCount;
- }
- return static_cast<u32>(count);
-}
-
-ResultVal<MiiInfo> MiiManager::UpdateLatest([[maybe_unused]] const MiiInfo& info,
- SourceFlag source_flag) {
- if ((source_flag & SourceFlag::Database) == SourceFlag::None) {
- return ERROR_CANNOT_FIND_ENTRY;
- }
-
- // TODO(bunnei): We don't implement the Mii database, so we can't have an entry
- return ERROR_CANNOT_FIND_ENTRY;
-}
-
-MiiInfo MiiManager::BuildRandom(Age age, Gender gender, Race race) {
- return ConvertStoreDataToInfo(BuildRandomStoreData(age, gender, race, user_id));
-}
-
-MiiInfo MiiManager::BuildDefault(std::size_t index) {
- return ConvertStoreDataToInfo(BuildDefaultStoreData(RawData::DefaultMii.at(index), user_id));
-}
-
-ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_flag) {
- std::vector<MiiInfoElement> result;
-
- if ((source_flag & SourceFlag::Default) == SourceFlag::None) {
- return MakeResult(std::move(result));
- }
-
- for (std::size_t index = 0; index < DefaultMiiCount; index++) {
- result.emplace_back(BuildDefault(index), Source::Default);
- }
-
- return MakeResult(std::move(result));
-}
-
-ResultCode MiiManager::GetIndex([[maybe_unused]] const MiiInfo& info, u32& index) {
- constexpr u32 INVALID_INDEX{0xFFFFFFFF};
-
- index = INVALID_INDEX;
-
- // TODO(bunnei): We don't implement the Mii database, so we can't have an index
- return ERROR_CANNOT_FIND_ENTRY;
-}
-
-} // namespace Service::Mii
diff --git a/src/core/hle/service/mii/manager.h b/src/core/hle/service/mii/manager.h
deleted file mode 100644
index ec7efa5f7..000000000
--- a/src/core/hle/service/mii/manager.h
+++ /dev/null
@@ -1,331 +0,0 @@
-// Copyright 2020 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/uuid.h"
-#include "core/hle/result.h"
-#include "core/hle/service/mii/types.h"
-
-namespace Service::Mii {
-
-enum class Source : u32 {
- Database = 0,
- Default = 1,
- Account = 2,
- Friend = 3,
-};
-
-enum class SourceFlag : u32 {
- None = 0,
- Database = 1 << 0,
- Default = 1 << 1,
-};
-DECLARE_ENUM_FLAG_OPERATORS(SourceFlag);
-
-struct MiiInfo {
- Common::UUID uuid;
- std::array<char16_t, 11> name;
- u8 font_region;
- u8 favorite_color;
- u8 gender;
- u8 height;
- u8 build;
- u8 type;
- u8 region_move;
- u8 faceline_type;
- u8 faceline_color;
- u8 faceline_wrinkle;
- u8 faceline_make;
- u8 hair_type;
- u8 hair_color;
- u8 hair_flip;
- u8 eye_type;
- u8 eye_color;
- u8 eye_scale;
- u8 eye_aspect;
- u8 eye_rotate;
- u8 eye_x;
- u8 eye_y;
- u8 eyebrow_type;
- u8 eyebrow_color;
- u8 eyebrow_scale;
- u8 eyebrow_aspect;
- u8 eyebrow_rotate;
- u8 eyebrow_x;
- u8 eyebrow_y;
- u8 nose_type;
- u8 nose_scale;
- u8 nose_y;
- u8 mouth_type;
- u8 mouth_color;
- u8 mouth_scale;
- u8 mouth_aspect;
- u8 mouth_y;
- u8 beard_color;
- u8 beard_type;
- u8 mustache_type;
- u8 mustache_scale;
- u8 mustache_y;
- u8 glasses_type;
- u8 glasses_color;
- u8 glasses_scale;
- u8 glasses_y;
- u8 mole_type;
- u8 mole_scale;
- u8 mole_x;
- u8 mole_y;
- u8 padding;
-
- std::u16string Name() const;
-};
-static_assert(sizeof(MiiInfo) == 0x58, "MiiInfo has incorrect size.");
-static_assert(std::has_unique_object_representations_v<MiiInfo>,
- "All bits of MiiInfo must contribute to its value.");
-
-#pragma pack(push, 4)
-
-struct MiiInfoElement {
- MiiInfoElement(const MiiInfo& info_, Source source_) : info{info_}, source{source_} {}
-
- MiiInfo info{};
- Source source{};
-};
-static_assert(sizeof(MiiInfoElement) == 0x5c, "MiiInfoElement has incorrect size.");
-
-struct MiiStoreBitFields {
- union {
- u32 word_0{};
-
- BitField<0, 8, u32> hair_type;
- BitField<8, 7, u32> height;
- BitField<15, 1, u32> mole_type;
- BitField<16, 7, u32> build;
- BitField<23, 1, HairFlip> hair_flip;
- BitField<24, 7, u32> hair_color;
- BitField<31, 1, u32> type;
- };
-
- union {
- u32 word_1{};
-
- BitField<0, 7, u32> eye_color;
- BitField<7, 1, Gender> gender;
- BitField<8, 7, u32> eyebrow_color;
- BitField<16, 7, u32> mouth_color;
- BitField<24, 7, u32> beard_color;
- };
-
- union {
- u32 word_2{};
-
- BitField<0, 7, u32> glasses_color;
- BitField<8, 6, u32> eye_type;
- BitField<14, 2, u32> region_move;
- BitField<16, 6, u32> mouth_type;
- BitField<22, 2, FontRegion> font_region;
- BitField<24, 5, u32> eye_y;
- BitField<29, 3, u32> glasses_scale;
- };
-
- union {
- u32 word_3{};
-
- BitField<0, 5, u32> eyebrow_type;
- BitField<5, 3, MustacheType> mustache_type;
- BitField<8, 5, u32> nose_type;
- BitField<13, 3, BeardType> beard_type;
- BitField<16, 5, u32> nose_y;
- BitField<21, 3, u32> mouth_aspect;
- BitField<24, 5, u32> mouth_y;
- BitField<29, 3, u32> eyebrow_aspect;
- };
-
- union {
- u32 word_4{};
-
- BitField<0, 5, u32> mustache_y;
- BitField<5, 3, u32> eye_rotate;
- BitField<8, 5, u32> glasses_y;
- BitField<13, 3, u32> eye_aspect;
- BitField<16, 5, u32> mole_x;
- BitField<21, 3, u32> eye_scale;
- BitField<24, 5, u32> mole_y;
- };
-
- union {
- u32 word_5{};
-
- BitField<0, 5, u32> glasses_type;
- BitField<8, 4, u32> favorite_color;
- BitField<12, 4, u32> faceline_type;
- BitField<16, 4, u32> faceline_color;
- BitField<20, 4, u32> faceline_wrinkle;
- BitField<24, 4, u32> faceline_makeup;
- BitField<28, 4, u32> eye_x;
- };
-
- union {
- u32 word_6{};
-
- BitField<0, 4, u32> eyebrow_scale;
- BitField<4, 4, u32> eyebrow_rotate;
- BitField<8, 4, u32> eyebrow_x;
- BitField<12, 4, u32> eyebrow_y;
- BitField<16, 4, u32> nose_scale;
- BitField<20, 4, u32> mouth_scale;
- BitField<24, 4, u32> mustache_scale;
- BitField<28, 4, u32> mole_scale;
- };
-};
-static_assert(sizeof(MiiStoreBitFields) == 0x1c, "MiiStoreBitFields has incorrect size.");
-static_assert(std::is_trivially_copyable_v<MiiStoreBitFields>,
- "MiiStoreBitFields is not trivially copyable.");
-
-struct MiiStoreData {
- using Name = std::array<char16_t, 10>;
-
- MiiStoreData();
- MiiStoreData(const Name& name, const MiiStoreBitFields& bit_fields,
- const Common::UUID& user_id);
-
- // This corresponds to the above structure MiiStoreBitFields. I did it like this because the
- // BitField<> type makes this (and any thing that contains it) not trivially copyable, which is
- // not suitable for our uses.
- struct {
- std::array<u8, 0x1C> data{};
- static_assert(sizeof(MiiStoreBitFields) == sizeof(data), "data field has incorrect size.");
-
- Name name{};
- Common::UUID uuid{Common::INVALID_UUID};
- } data;
-
- u16 data_crc{};
- u16 device_crc{};
-};
-static_assert(sizeof(MiiStoreData) == 0x44, "MiiStoreData has incorrect size.");
-
-struct MiiStoreDataElement {
- MiiStoreData data{};
- Source source{};
-};
-static_assert(sizeof(MiiStoreDataElement) == 0x48, "MiiStoreDataElement has incorrect size.");
-
-struct MiiDatabase {
- u32 magic{}; // 'NFDB'
- std::array<MiiStoreData, 0x64> miis{};
- INSERT_PADDING_BYTES(1);
- u8 count{};
- u16 crc{};
-};
-static_assert(sizeof(MiiDatabase) == 0x1A98, "MiiDatabase has incorrect size.");
-
-struct RandomMiiValues {
- std::array<u8, 0xbc> values{};
-};
-static_assert(sizeof(RandomMiiValues) == 0xbc, "RandomMiiValues has incorrect size.");
-
-struct RandomMiiData4 {
- Gender gender{};
- Age age{};
- Race race{};
- u32 values_count{};
- std::array<u32, 47> values{};
-};
-static_assert(sizeof(RandomMiiData4) == 0xcc, "RandomMiiData4 has incorrect size.");
-
-struct RandomMiiData3 {
- u32 arg_1;
- u32 arg_2;
- u32 values_count;
- std::array<u32, 47> values{};
-};
-static_assert(sizeof(RandomMiiData3) == 0xc8, "RandomMiiData3 has incorrect size.");
-
-struct RandomMiiData2 {
- u32 arg_1;
- u32 values_count;
- std::array<u32, 47> values{};
-};
-static_assert(sizeof(RandomMiiData2) == 0xc4, "RandomMiiData2 has incorrect size.");
-
-struct DefaultMii {
- u32 face_type{};
- u32 face_color{};
- u32 face_wrinkle{};
- u32 face_makeup{};
- u32 hair_type{};
- u32 hair_color{};
- u32 hair_flip{};
- u32 eye_type{};
- u32 eye_color{};
- u32 eye_scale{};
- u32 eye_aspect{};
- u32 eye_rotate{};
- u32 eye_x{};
- u32 eye_y{};
- u32 eyebrow_type{};
- u32 eyebrow_color{};
- u32 eyebrow_scale{};
- u32 eyebrow_aspect{};
- u32 eyebrow_rotate{};
- u32 eyebrow_x{};
- u32 eyebrow_y{};
- u32 nose_type{};
- u32 nose_scale{};
- u32 nose_y{};
- u32 mouth_type{};
- u32 mouth_color{};
- u32 mouth_scale{};
- u32 mouth_aspect{};
- u32 mouth_y{};
- u32 mustache_type{};
- u32 beard_type{};
- u32 beard_color{};
- u32 mustache_scale{};
- u32 mustache_y{};
- u32 glasses_type{};
- u32 glasses_color{};
- u32 glasses_scale{};
- u32 glasses_y{};
- u32 mole_type{};
- u32 mole_scale{};
- u32 mole_x{};
- u32 mole_y{};
- u32 height{};
- u32 weight{};
- Gender gender{};
- u32 favorite_color{};
- u32 region{};
- FontRegion font_region{};
- u32 type{};
- INSERT_PADDING_WORDS(5);
-};
-static_assert(sizeof(DefaultMii) == 0xd8, "MiiStoreData has incorrect size.");
-
-#pragma pack(pop)
-
-// The Mii manager is responsible for loading and storing the Miis to the database in NAND along
-// with providing an easy interface for HLE emulation of the mii service.
-class MiiManager {
-public:
- MiiManager();
-
- bool CheckAndResetUpdateCounter(SourceFlag source_flag, u64& current_update_counter);
- bool IsFullDatabase() const;
- u32 GetCount(SourceFlag source_flag) const;
- ResultVal<MiiInfo> UpdateLatest(const MiiInfo& info, SourceFlag source_flag);
- MiiInfo BuildRandom(Age age, Gender gender, Race race);
- MiiInfo BuildDefault(std::size_t index);
- ResultVal<std::vector<MiiInfoElement>> GetDefault(SourceFlag source_flag);
- ResultCode GetIndex(const MiiInfo& info, u32& index);
-
-private:
- const Common::UUID user_id{Common::INVALID_UUID};
- u64 update_counter{};
-};
-
-}; // namespace Service::Mii
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp
index bbd81a88a..9d863486a 100644
--- a/src/core/hle/service/mii/mii.cpp
+++ b/src/core/hle/service/mii/mii.cpp
@@ -7,8 +7,8 @@
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/hle_ipc.h"
-#include "core/hle/service/mii/manager.h"
#include "core/hle/service/mii/mii.h"
+#include "core/hle/service/mii/mii_manager.h"
#include "core/hle/service/service.h"
#include "core/hle/service/sm/sm.h"
diff --git a/src/core/hle/service/mii/mii_manager.cpp b/src/core/hle/service/mii/mii_manager.cpp
new file mode 100644
index 000000000..4fef2aea4
--- /dev/null
+++ b/src/core/hle/service/mii/mii_manager.cpp
@@ -0,0 +1,465 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <random>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/string_util.h"
+
+#include "core/hle/service/acc/profile_manager.h"
+#include "core/hle/service/mii/mii_manager.h"
+#include "core/hle/service/mii/raw_data.h"
+#include "core/hle/service/mii/types.h"
+
+namespace Service::Mii {
+
+namespace {
+
+constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4};
+
+constexpr std::size_t BaseMiiCount{2};
+constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()};
+
+constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'};
+constexpr std::array<u8, 8> HairColorLookup{8, 1, 2, 3, 4, 5, 6, 7};
+constexpr std::array<u8, 6> EyeColorLookup{8, 9, 10, 11, 12, 13};
+constexpr std::array<u8, 5> MouthColorLookup{19, 20, 21, 22, 23};
+constexpr std::array<u8, 7> GlassesColorLookup{8, 14, 15, 16, 17, 18, 0};
+constexpr std::array<u8, 62> EyeRotateLookup{
+ {0x03, 0x04, 0x04, 0x04, 0x03, 0x04, 0x04, 0x04, 0x03, 0x04, 0x04, 0x04, 0x04, 0x03, 0x03, 0x04,
+ 0x04, 0x04, 0x03, 0x03, 0x04, 0x03, 0x04, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x03, 0x04, 0x04,
+ 0x04, 0x03, 0x03, 0x03, 0x04, 0x04, 0x03, 0x03, 0x03, 0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x03, 0x04, 0x04, 0x03, 0x04, 0x04}};
+constexpr std::array<u8, 24> EyebrowRotateLookup{{0x06, 0x06, 0x05, 0x07, 0x06, 0x07, 0x06, 0x07,
+ 0x04, 0x07, 0x06, 0x08, 0x05, 0x05, 0x06, 0x06,
+ 0x07, 0x07, 0x06, 0x06, 0x05, 0x06, 0x07, 0x05}};
+
+template <typename T, std::size_t SourceArraySize, std::size_t DestArraySize>
+std::array<T, DestArraySize> ResizeArray(const std::array<T, SourceArraySize>& in) {
+ std::array<T, DestArraySize> out{};
+ std::memcpy(out.data(), in.data(), sizeof(T) * std::min(SourceArraySize, DestArraySize));
+ return out;
+}
+
+MiiInfo ConvertStoreDataToInfo(const MiiStoreData& data) {
+ MiiStoreBitFields bf;
+ std::memcpy(&bf, data.data.data.data(), sizeof(MiiStoreBitFields));
+
+ return {
+ .uuid = data.data.uuid,
+ .name = ResizeArray<char16_t, 10, 11>(data.data.name),
+ .font_region = static_cast<u8>(bf.font_region.Value()),
+ .favorite_color = static_cast<u8>(bf.favorite_color.Value()),
+ .gender = static_cast<u8>(bf.gender.Value()),
+ .height = static_cast<u8>(bf.height.Value()),
+ .build = static_cast<u8>(bf.build.Value()),
+ .type = static_cast<u8>(bf.type.Value()),
+ .region_move = static_cast<u8>(bf.region_move.Value()),
+ .faceline_type = static_cast<u8>(bf.faceline_type.Value()),
+ .faceline_color = static_cast<u8>(bf.faceline_color.Value()),
+ .faceline_wrinkle = static_cast<u8>(bf.faceline_wrinkle.Value()),
+ .faceline_make = static_cast<u8>(bf.faceline_makeup.Value()),
+ .hair_type = static_cast<u8>(bf.hair_type.Value()),
+ .hair_color = static_cast<u8>(bf.hair_color.Value()),
+ .hair_flip = static_cast<u8>(bf.hair_flip.Value()),
+ .eye_type = static_cast<u8>(bf.eye_type.Value()),
+ .eye_color = static_cast<u8>(bf.eye_color.Value()),
+ .eye_scale = static_cast<u8>(bf.eye_scale.Value()),
+ .eye_aspect = static_cast<u8>(bf.eye_aspect.Value()),
+ .eye_rotate = static_cast<u8>(bf.eye_rotate.Value()),
+ .eye_x = static_cast<u8>(bf.eye_x.Value()),
+ .eye_y = static_cast<u8>(bf.eye_y.Value()),
+ .eyebrow_type = static_cast<u8>(bf.eyebrow_type.Value()),
+ .eyebrow_color = static_cast<u8>(bf.eyebrow_color.Value()),
+ .eyebrow_scale = static_cast<u8>(bf.eyebrow_scale.Value()),
+ .eyebrow_aspect = static_cast<u8>(bf.eyebrow_aspect.Value()),
+ .eyebrow_rotate = static_cast<u8>(bf.eyebrow_rotate.Value()),
+ .eyebrow_x = static_cast<u8>(bf.eyebrow_x.Value()),
+ .eyebrow_y = static_cast<u8>(bf.eyebrow_y.Value() + 3),
+ .nose_type = static_cast<u8>(bf.nose_type.Value()),
+ .nose_scale = static_cast<u8>(bf.nose_scale.Value()),
+ .nose_y = static_cast<u8>(bf.nose_y.Value()),
+ .mouth_type = static_cast<u8>(bf.mouth_type.Value()),
+ .mouth_color = static_cast<u8>(bf.mouth_color.Value()),
+ .mouth_scale = static_cast<u8>(bf.mouth_scale.Value()),
+ .mouth_aspect = static_cast<u8>(bf.mouth_aspect.Value()),
+ .mouth_y = static_cast<u8>(bf.mouth_y.Value()),
+ .beard_color = static_cast<u8>(bf.beard_color.Value()),
+ .beard_type = static_cast<u8>(bf.beard_type.Value()),
+ .mustache_type = static_cast<u8>(bf.mustache_type.Value()),
+ .mustache_scale = static_cast<u8>(bf.mustache_scale.Value()),
+ .mustache_y = static_cast<u8>(bf.mustache_y.Value()),
+ .glasses_type = static_cast<u8>(bf.glasses_type.Value()),
+ .glasses_color = static_cast<u8>(bf.glasses_color.Value()),
+ .glasses_scale = static_cast<u8>(bf.glasses_scale.Value()),
+ .glasses_y = static_cast<u8>(bf.glasses_y.Value()),
+ .mole_type = static_cast<u8>(bf.mole_type.Value()),
+ .mole_scale = static_cast<u8>(bf.mole_scale.Value()),
+ .mole_x = static_cast<u8>(bf.mole_x.Value()),
+ .mole_y = static_cast<u8>(bf.mole_y.Value()),
+ .padding = 0,
+ };
+}
+
+u16 GenerateCrc16(const void* data, std::size_t size) {
+ s32 crc{};
+ for (std::size_t i = 0; i < size; i++) {
+ crc ^= static_cast<const u8*>(data)[i] << 8;
+ for (std::size_t j = 0; j < 8; j++) {
+ crc <<= 1;
+ if ((crc & 0x10000) != 0) {
+ crc = (crc ^ 0x1021) & 0xFFFF;
+ }
+ }
+ }
+ return Common::swap16(static_cast<u16>(crc));
+}
+
+Common::UUID GenerateValidUUID() {
+ auto uuid{Common::UUID::Generate()};
+
+ // Bit 7 must be set, and bit 6 unset for the UUID to be valid
+ uuid.uuid[1] &= 0xFFFFFFFFFFFFFF3FULL;
+ uuid.uuid[1] |= 0x0000000000000080ULL;
+
+ return uuid;
+}
+
+template <typename T>
+T GetRandomValue(T min, T max) {
+ std::random_device device;
+ std::mt19937 gen(device());
+ std::uniform_int_distribution<u64> distribution(static_cast<u64>(min), static_cast<u64>(max));
+ return static_cast<T>(distribution(gen));
+}
+
+template <typename T>
+T GetRandomValue(T max) {
+ return GetRandomValue<T>({}, max);
+}
+
+MiiStoreData BuildRandomStoreData(Age age, Gender gender, Race race, const Common::UUID& user_id) {
+ MiiStoreBitFields bf{};
+
+ if (gender == Gender::All) {
+ gender = GetRandomValue<Gender>(Gender::Maximum);
+ }
+
+ bf.gender.Assign(gender);
+ bf.favorite_color.Assign(GetRandomValue<u8>(11));
+ bf.region_move.Assign(0);
+ bf.font_region.Assign(FontRegion::Standard);
+ bf.type.Assign(0);
+ bf.height.Assign(64);
+ bf.build.Assign(64);
+
+ if (age == Age::All) {
+ const auto temp{GetRandomValue<int>(10)};
+ if (temp >= 8) {
+ age = Age::Old;
+ } else if (temp >= 4) {
+ age = Age::Normal;
+ } else {
+ age = Age::Young;
+ }
+ }
+
+ if (race == Race::All) {
+ const auto temp{GetRandomValue<int>(10)};
+ if (temp >= 8) {
+ race = Race::Black;
+ } else if (temp >= 4) {
+ race = Race::White;
+ } else {
+ race = Race::Asian;
+ }
+ }
+
+ u32 axis_y{};
+ if (gender == Gender::Female && age == Age::Young) {
+ axis_y = GetRandomValue<u32>(3);
+ }
+
+ const std::size_t index{3 * static_cast<std::size_t>(age) +
+ 9 * static_cast<std::size_t>(gender) + static_cast<std::size_t>(race)};
+
+ const auto faceline_type_info{RawData::RandomMiiFaceline.at(index)};
+ const auto faceline_color_info{RawData::RandomMiiFacelineColor.at(
+ 3 * static_cast<std::size_t>(gender) + static_cast<std::size_t>(race))};
+ const auto faceline_wrinkle_info{RawData::RandomMiiFacelineWrinkle.at(index)};
+ const auto faceline_makeup_info{RawData::RandomMiiFacelineMakeup.at(index)};
+ const auto hair_type_info{RawData::RandomMiiHairType.at(index)};
+ const auto hair_color_info{RawData::RandomMiiHairColor.at(3 * static_cast<std::size_t>(race) +
+ static_cast<std::size_t>(age))};
+ const auto eye_type_info{RawData::RandomMiiEyeType.at(index)};
+ const auto eye_color_info{RawData::RandomMiiEyeColor.at(static_cast<std::size_t>(race))};
+ const auto eyebrow_type_info{RawData::RandomMiiEyebrowType.at(index)};
+ const auto nose_type_info{RawData::RandomMiiNoseType.at(index)};
+ const auto mouth_type_info{RawData::RandomMiiMouthType.at(index)};
+ const auto glasses_type_info{RawData::RandomMiiGlassType.at(static_cast<std::size_t>(age))};
+
+ bf.faceline_type.Assign(
+ faceline_type_info.values[GetRandomValue<std::size_t>(faceline_type_info.values_count)]);
+ bf.faceline_color.Assign(
+ faceline_color_info.values[GetRandomValue<std::size_t>(faceline_color_info.values_count)]);
+ bf.faceline_wrinkle.Assign(
+ faceline_wrinkle_info
+ .values[GetRandomValue<std::size_t>(faceline_wrinkle_info.values_count)]);
+ bf.faceline_makeup.Assign(
+ faceline_makeup_info
+ .values[GetRandomValue<std::size_t>(faceline_makeup_info.values_count)]);
+
+ bf.hair_type.Assign(
+ hair_type_info.values[GetRandomValue<std::size_t>(hair_type_info.values_count)]);
+ bf.hair_color.Assign(
+ HairColorLookup[hair_color_info
+ .values[GetRandomValue<std::size_t>(hair_color_info.values_count)]]);
+ bf.hair_flip.Assign(GetRandomValue<HairFlip>(HairFlip::Maximum));
+
+ bf.eye_type.Assign(
+ eye_type_info.values[GetRandomValue<std::size_t>(eye_type_info.values_count)]);
+
+ const auto eye_rotate_1{gender != Gender::Male ? 4 : 2};
+ const auto eye_rotate_2{gender != Gender::Male ? 3 : 4};
+ const auto eye_rotate_offset{32 - EyeRotateLookup[eye_rotate_1] + eye_rotate_2};
+ const auto eye_rotate{32 - EyeRotateLookup[bf.eye_type]};
+
+ bf.eye_color.Assign(
+ EyeColorLookup[eye_color_info
+ .values[GetRandomValue<std::size_t>(eye_color_info.values_count)]]);
+ bf.eye_scale.Assign(4);
+ bf.eye_aspect.Assign(3);
+ bf.eye_rotate.Assign(eye_rotate_offset - eye_rotate);
+ bf.eye_x.Assign(2);
+ bf.eye_y.Assign(axis_y + 12);
+
+ bf.eyebrow_type.Assign(
+ eyebrow_type_info.values[GetRandomValue<std::size_t>(eyebrow_type_info.values_count)]);
+
+ const auto eyebrow_rotate_1{race == Race::Asian ? 6 : 0};
+ const auto eyebrow_y{race == Race::Asian ? 9 : 10};
+ const auto eyebrow_rotate_offset{32 - EyebrowRotateLookup[eyebrow_rotate_1] + 6};
+ const auto eyebrow_rotate{
+ 32 - EyebrowRotateLookup[static_cast<std::size_t>(bf.eyebrow_type.Value())]};
+
+ bf.eyebrow_color.Assign(bf.hair_color);
+ bf.eyebrow_scale.Assign(4);
+ bf.eyebrow_aspect.Assign(3);
+ bf.eyebrow_rotate.Assign(eyebrow_rotate_offset - eyebrow_rotate);
+ bf.eyebrow_x.Assign(2);
+ bf.eyebrow_y.Assign(axis_y + eyebrow_y);
+
+ const auto nose_scale{gender == Gender::Female ? 3 : 4};
+
+ bf.nose_type.Assign(
+ nose_type_info.values[GetRandomValue<std::size_t>(nose_type_info.values_count)]);
+ bf.nose_scale.Assign(nose_scale);
+ bf.nose_y.Assign(axis_y + 9);
+
+ const auto mouth_color{gender == Gender::Female ? GetRandomValue<int>(4) : 0};
+
+ bf.mouth_type.Assign(
+ mouth_type_info.values[GetRandomValue<std::size_t>(mouth_type_info.values_count)]);
+ bf.mouth_color.Assign(MouthColorLookup[mouth_color]);
+ bf.mouth_scale.Assign(4);
+ bf.mouth_aspect.Assign(3);
+ bf.mouth_y.Assign(axis_y + 13);
+
+ bf.beard_color.Assign(bf.hair_color);
+ bf.mustache_scale.Assign(4);
+
+ if (gender == Gender::Male && age != Age::Young && GetRandomValue<int>(10) < 2) {
+ const auto mustache_and_beard_flag{
+ GetRandomValue<BeardAndMustacheFlag>(BeardAndMustacheFlag::All)};
+
+ auto beard_type{BeardType::None};
+ auto mustache_type{MustacheType::None};
+
+ if ((mustache_and_beard_flag & BeardAndMustacheFlag::Beard) ==
+ BeardAndMustacheFlag::Beard) {
+ beard_type = GetRandomValue<BeardType>(BeardType::Beard1, BeardType::Beard5);
+ }
+
+ if ((mustache_and_beard_flag & BeardAndMustacheFlag::Mustache) ==
+ BeardAndMustacheFlag::Mustache) {
+ mustache_type =
+ GetRandomValue<MustacheType>(MustacheType::Mustache1, MustacheType::Mustache5);
+ }
+
+ bf.mustache_type.Assign(mustache_type);
+ bf.beard_type.Assign(beard_type);
+ bf.mustache_y.Assign(10);
+ } else {
+ bf.mustache_type.Assign(MustacheType::None);
+ bf.beard_type.Assign(BeardType::None);
+ bf.mustache_y.Assign(axis_y + 10);
+ }
+
+ const auto glasses_type_start{GetRandomValue<std::size_t>(100)};
+ u8 glasses_type{};
+ while (glasses_type_start < glasses_type_info.values[glasses_type]) {
+ if (++glasses_type >= glasses_type_info.values_count) {
+ UNREACHABLE();
+ break;
+ }
+ }
+
+ bf.glasses_type.Assign(glasses_type);
+ bf.glasses_color.Assign(GlassesColorLookup[0]);
+ bf.glasses_scale.Assign(4);
+ bf.glasses_y.Assign(axis_y + 10);
+
+ bf.mole_type.Assign(0);
+ bf.mole_scale.Assign(4);
+ bf.mole_x.Assign(2);
+ bf.mole_y.Assign(20);
+
+ return {DefaultMiiName, bf, user_id};
+}
+
+MiiStoreData BuildDefaultStoreData(const DefaultMii& info, const Common::UUID& user_id) {
+ MiiStoreBitFields bf{};
+
+ bf.font_region.Assign(info.font_region);
+ bf.favorite_color.Assign(info.favorite_color);
+ bf.gender.Assign(info.gender);
+ bf.height.Assign(info.height);
+ bf.build.Assign(info.weight);
+ bf.type.Assign(info.type);
+ bf.region_move.Assign(info.region);
+ bf.faceline_type.Assign(info.face_type);
+ bf.faceline_color.Assign(info.face_color);
+ bf.faceline_wrinkle.Assign(info.face_wrinkle);
+ bf.faceline_makeup.Assign(info.face_makeup);
+ bf.hair_type.Assign(info.hair_type);
+ bf.hair_color.Assign(HairColorLookup[info.hair_color]);
+ bf.hair_flip.Assign(static_cast<HairFlip>(info.hair_flip));
+ bf.eye_type.Assign(info.eye_type);
+ bf.eye_color.Assign(EyeColorLookup[info.eye_color]);
+ bf.eye_scale.Assign(info.eye_scale);
+ bf.eye_aspect.Assign(info.eye_aspect);
+ bf.eye_rotate.Assign(info.eye_rotate);
+ bf.eye_x.Assign(info.eye_x);
+ bf.eye_y.Assign(info.eye_y);
+ bf.eyebrow_type.Assign(info.eyebrow_type);
+ bf.eyebrow_color.Assign(HairColorLookup[info.eyebrow_color]);
+ bf.eyebrow_scale.Assign(info.eyebrow_scale);
+ bf.eyebrow_aspect.Assign(info.eyebrow_aspect);
+ bf.eyebrow_rotate.Assign(info.eyebrow_rotate);
+ bf.eyebrow_x.Assign(info.eyebrow_x);
+ bf.eyebrow_y.Assign(info.eyebrow_y - 3);
+ bf.nose_type.Assign(info.nose_type);
+ bf.nose_scale.Assign(info.nose_scale);
+ bf.nose_y.Assign(info.nose_y);
+ bf.mouth_type.Assign(info.mouth_type);
+ bf.mouth_color.Assign(MouthColorLookup[info.mouth_color]);
+ bf.mouth_scale.Assign(info.mouth_scale);
+ bf.mouth_aspect.Assign(info.mouth_aspect);
+ bf.mouth_y.Assign(info.mouth_y);
+ bf.beard_color.Assign(HairColorLookup[info.beard_color]);
+ bf.beard_type.Assign(static_cast<BeardType>(info.beard_type));
+ bf.mustache_type.Assign(static_cast<MustacheType>(info.mustache_type));
+ bf.mustache_scale.Assign(info.mustache_scale);
+ bf.mustache_y.Assign(info.mustache_y);
+ bf.glasses_type.Assign(info.glasses_type);
+ bf.glasses_color.Assign(GlassesColorLookup[info.glasses_color]);
+ bf.glasses_scale.Assign(info.glasses_scale);
+ bf.glasses_y.Assign(info.glasses_y);
+ bf.mole_type.Assign(info.mole_type);
+ bf.mole_scale.Assign(info.mole_scale);
+ bf.mole_x.Assign(info.mole_x);
+ bf.mole_y.Assign(info.mole_y);
+
+ return {DefaultMiiName, bf, user_id};
+}
+
+} // namespace
+
+MiiStoreData::MiiStoreData() = default;
+
+MiiStoreData::MiiStoreData(const MiiStoreData::Name& name, const MiiStoreBitFields& bit_fields,
+ const Common::UUID& user_id) {
+ data.name = name;
+ data.uuid = GenerateValidUUID();
+
+ std::memcpy(data.data.data(), &bit_fields, sizeof(MiiStoreBitFields));
+ data_crc = GenerateCrc16(data.data.data(), sizeof(data));
+ device_crc = GenerateCrc16(&user_id, sizeof(Common::UUID));
+}
+
+MiiManager::MiiManager() : user_id{Service::Account::ProfileManager().GetLastOpenedUser()} {}
+
+bool MiiManager::CheckAndResetUpdateCounter(SourceFlag source_flag, u64& current_update_counter) {
+ if ((source_flag & SourceFlag::Database) == SourceFlag::None) {
+ return false;
+ }
+
+ const bool result{current_update_counter != update_counter};
+
+ current_update_counter = update_counter;
+
+ return result;
+}
+
+bool MiiManager::IsFullDatabase() const {
+ // TODO(bunnei): We don't implement the Mii database, so it cannot be full
+ return false;
+}
+
+u32 MiiManager::GetCount(SourceFlag source_flag) const {
+ std::size_t count{};
+ if ((source_flag & SourceFlag::Database) != SourceFlag::None) {
+ // TODO(bunnei): We don't implement the Mii database, but when we do, update this
+ count += 0;
+ }
+ if ((source_flag & SourceFlag::Default) != SourceFlag::None) {
+ count += (DefaultMiiCount - BaseMiiCount);
+ }
+ return static_cast<u32>(count);
+}
+
+ResultVal<MiiInfo> MiiManager::UpdateLatest([[maybe_unused]] const MiiInfo& info,
+ SourceFlag source_flag) {
+ if ((source_flag & SourceFlag::Database) == SourceFlag::None) {
+ return ERROR_CANNOT_FIND_ENTRY;
+ }
+
+ // TODO(bunnei): We don't implement the Mii database, so we can't have an entry
+ return ERROR_CANNOT_FIND_ENTRY;
+}
+
+MiiInfo MiiManager::BuildRandom(Age age, Gender gender, Race race) {
+ return ConvertStoreDataToInfo(BuildRandomStoreData(age, gender, race, user_id));
+}
+
+MiiInfo MiiManager::BuildDefault(std::size_t index) {
+ return ConvertStoreDataToInfo(BuildDefaultStoreData(RawData::DefaultMii.at(index), user_id));
+}
+
+ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_flag) {
+ std::vector<MiiInfoElement> result;
+
+ if ((source_flag & SourceFlag::Default) == SourceFlag::None) {
+ return MakeResult(std::move(result));
+ }
+
+ for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) {
+ result.emplace_back(BuildDefault(index), Source::Default);
+ }
+
+ return MakeResult(std::move(result));
+}
+
+ResultCode MiiManager::GetIndex([[maybe_unused]] const MiiInfo& info, u32& index) {
+ constexpr u32 INVALID_INDEX{0xFFFFFFFF};
+
+ index = INVALID_INDEX;
+
+ // TODO(bunnei): We don't implement the Mii database, so we can't have an index
+ return ERROR_CANNOT_FIND_ENTRY;
+}
+
+} // namespace Service::Mii
diff --git a/src/core/hle/service/mii/mii_manager.h b/src/core/hle/service/mii/mii_manager.h
new file mode 100644
index 000000000..8e048fc56
--- /dev/null
+++ b/src/core/hle/service/mii/mii_manager.h
@@ -0,0 +1,333 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/uuid.h"
+#include "core/hle/result.h"
+#include "core/hle/service/mii/types.h"
+
+namespace Service::Mii {
+
+enum class Source : u32 {
+ Database = 0,
+ Default = 1,
+ Account = 2,
+ Friend = 3,
+};
+
+enum class SourceFlag : u32 {
+ None = 0,
+ Database = 1 << 0,
+ Default = 1 << 1,
+};
+DECLARE_ENUM_FLAG_OPERATORS(SourceFlag);
+
+struct MiiInfo {
+ Common::UUID uuid;
+ std::array<char16_t, 11> name;
+ u8 font_region;
+ u8 favorite_color;
+ u8 gender;
+ u8 height;
+ u8 build;
+ u8 type;
+ u8 region_move;
+ u8 faceline_type;
+ u8 faceline_color;
+ u8 faceline_wrinkle;
+ u8 faceline_make;
+ u8 hair_type;
+ u8 hair_color;
+ u8 hair_flip;
+ u8 eye_type;
+ u8 eye_color;
+ u8 eye_scale;
+ u8 eye_aspect;
+ u8 eye_rotate;
+ u8 eye_x;
+ u8 eye_y;
+ u8 eyebrow_type;
+ u8 eyebrow_color;
+ u8 eyebrow_scale;
+ u8 eyebrow_aspect;
+ u8 eyebrow_rotate;
+ u8 eyebrow_x;
+ u8 eyebrow_y;
+ u8 nose_type;
+ u8 nose_scale;
+ u8 nose_y;
+ u8 mouth_type;
+ u8 mouth_color;
+ u8 mouth_scale;
+ u8 mouth_aspect;
+ u8 mouth_y;
+ u8 beard_color;
+ u8 beard_type;
+ u8 mustache_type;
+ u8 mustache_scale;
+ u8 mustache_y;
+ u8 glasses_type;
+ u8 glasses_color;
+ u8 glasses_scale;
+ u8 glasses_y;
+ u8 mole_type;
+ u8 mole_scale;
+ u8 mole_x;
+ u8 mole_y;
+ u8 padding;
+
+ std::u16string Name() const;
+};
+static_assert(sizeof(MiiInfo) == 0x58, "MiiInfo has incorrect size.");
+static_assert(std::has_unique_object_representations_v<MiiInfo>,
+ "All bits of MiiInfo must contribute to its value.");
+
+#pragma pack(push, 4)
+
+struct MiiInfoElement {
+ MiiInfoElement(const MiiInfo& info_, Source source_) : info{info_}, source{source_} {}
+
+ MiiInfo info{};
+ Source source{};
+};
+static_assert(sizeof(MiiInfoElement) == 0x5c, "MiiInfoElement has incorrect size.");
+
+struct MiiStoreBitFields {
+ union {
+ u32 word_0{};
+
+ BitField<0, 8, u32> hair_type;
+ BitField<8, 7, u32> height;
+ BitField<15, 1, u32> mole_type;
+ BitField<16, 7, u32> build;
+ BitField<23, 1, HairFlip> hair_flip;
+ BitField<24, 7, u32> hair_color;
+ BitField<31, 1, u32> type;
+ };
+
+ union {
+ u32 word_1{};
+
+ BitField<0, 7, u32> eye_color;
+ BitField<7, 1, Gender> gender;
+ BitField<8, 7, u32> eyebrow_color;
+ BitField<16, 7, u32> mouth_color;
+ BitField<24, 7, u32> beard_color;
+ };
+
+ union {
+ u32 word_2{};
+
+ BitField<0, 7, u32> glasses_color;
+ BitField<8, 6, u32> eye_type;
+ BitField<14, 2, u32> region_move;
+ BitField<16, 6, u32> mouth_type;
+ BitField<22, 2, FontRegion> font_region;
+ BitField<24, 5, u32> eye_y;
+ BitField<29, 3, u32> glasses_scale;
+ };
+
+ union {
+ u32 word_3{};
+
+ BitField<0, 5, u32> eyebrow_type;
+ BitField<5, 3, MustacheType> mustache_type;
+ BitField<8, 5, u32> nose_type;
+ BitField<13, 3, BeardType> beard_type;
+ BitField<16, 5, u32> nose_y;
+ BitField<21, 3, u32> mouth_aspect;
+ BitField<24, 5, u32> mouth_y;
+ BitField<29, 3, u32> eyebrow_aspect;
+ };
+
+ union {
+ u32 word_4{};
+
+ BitField<0, 5, u32> mustache_y;
+ BitField<5, 3, u32> eye_rotate;
+ BitField<8, 5, u32> glasses_y;
+ BitField<13, 3, u32> eye_aspect;
+ BitField<16, 5, u32> mole_x;
+ BitField<21, 3, u32> eye_scale;
+ BitField<24, 5, u32> mole_y;
+ };
+
+ union {
+ u32 word_5{};
+
+ BitField<0, 5, u32> glasses_type;
+ BitField<8, 4, u32> favorite_color;
+ BitField<12, 4, u32> faceline_type;
+ BitField<16, 4, u32> faceline_color;
+ BitField<20, 4, u32> faceline_wrinkle;
+ BitField<24, 4, u32> faceline_makeup;
+ BitField<28, 4, u32> eye_x;
+ };
+
+ union {
+ u32 word_6{};
+
+ BitField<0, 4, u32> eyebrow_scale;
+ BitField<4, 4, u32> eyebrow_rotate;
+ BitField<8, 4, u32> eyebrow_x;
+ BitField<12, 4, u32> eyebrow_y;
+ BitField<16, 4, u32> nose_scale;
+ BitField<20, 4, u32> mouth_scale;
+ BitField<24, 4, u32> mustache_scale;
+ BitField<28, 4, u32> mole_scale;
+ };
+};
+static_assert(sizeof(MiiStoreBitFields) == 0x1c, "MiiStoreBitFields has incorrect size.");
+static_assert(std::is_trivially_copyable_v<MiiStoreBitFields>,
+ "MiiStoreBitFields is not trivially copyable.");
+
+struct MiiStoreData {
+ using Name = std::array<char16_t, 10>;
+
+ MiiStoreData();
+ MiiStoreData(const Name& name, const MiiStoreBitFields& bit_fields,
+ const Common::UUID& user_id);
+
+ // This corresponds to the above structure MiiStoreBitFields. I did it like this because the
+ // BitField<> type makes this (and any thing that contains it) not trivially copyable, which is
+ // not suitable for our uses.
+ struct {
+ std::array<u8, 0x1C> data{};
+ static_assert(sizeof(MiiStoreBitFields) == sizeof(data), "data field has incorrect size.");
+
+ Name name{};
+ Common::UUID uuid{Common::INVALID_UUID};
+ } data;
+
+ u16 data_crc{};
+ u16 device_crc{};
+};
+static_assert(sizeof(MiiStoreData) == 0x44, "MiiStoreData has incorrect size.");
+
+struct MiiStoreDataElement {
+ MiiStoreData data{};
+ Source source{};
+};
+static_assert(sizeof(MiiStoreDataElement) == 0x48, "MiiStoreDataElement has incorrect size.");
+
+struct MiiDatabase {
+ u32 magic{}; // 'NFDB'
+ std::array<MiiStoreData, 0x64> miis{};
+ INSERT_PADDING_BYTES(1);
+ u8 count{};
+ u16 crc{};
+};
+static_assert(sizeof(MiiDatabase) == 0x1A98, "MiiDatabase has incorrect size.");
+
+struct RandomMiiValues {
+ std::array<u8, 0xbc> values{};
+};
+static_assert(sizeof(RandomMiiValues) == 0xbc, "RandomMiiValues has incorrect size.");
+
+struct RandomMiiData4 {
+ Gender gender{};
+ Age age{};
+ Race race{};
+ u32 values_count{};
+ std::array<u32, 47> values{};
+};
+static_assert(sizeof(RandomMiiData4) == 0xcc, "RandomMiiData4 has incorrect size.");
+
+struct RandomMiiData3 {
+ u32 arg_1;
+ u32 arg_2;
+ u32 values_count;
+ std::array<u32, 47> values{};
+};
+static_assert(sizeof(RandomMiiData3) == 0xc8, "RandomMiiData3 has incorrect size.");
+
+struct RandomMiiData2 {
+ u32 arg_1;
+ u32 values_count;
+ std::array<u32, 47> values{};
+};
+static_assert(sizeof(RandomMiiData2) == 0xc4, "RandomMiiData2 has incorrect size.");
+
+struct DefaultMii {
+ u32 face_type{};
+ u32 face_color{};
+ u32 face_wrinkle{};
+ u32 face_makeup{};
+ u32 hair_type{};
+ u32 hair_color{};
+ u32 hair_flip{};
+ u32 eye_type{};
+ u32 eye_color{};
+ u32 eye_scale{};
+ u32 eye_aspect{};
+ u32 eye_rotate{};
+ u32 eye_x{};
+ u32 eye_y{};
+ u32 eyebrow_type{};
+ u32 eyebrow_color{};
+ u32 eyebrow_scale{};
+ u32 eyebrow_aspect{};
+ u32 eyebrow_rotate{};
+ u32 eyebrow_x{};
+ u32 eyebrow_y{};
+ u32 nose_type{};
+ u32 nose_scale{};
+ u32 nose_y{};
+ u32 mouth_type{};
+ u32 mouth_color{};
+ u32 mouth_scale{};
+ u32 mouth_aspect{};
+ u32 mouth_y{};
+ u32 mustache_type{};
+ u32 beard_type{};
+ u32 beard_color{};
+ u32 mustache_scale{};
+ u32 mustache_y{};
+ u32 glasses_type{};
+ u32 glasses_color{};
+ u32 glasses_scale{};
+ u32 glasses_y{};
+ u32 mole_type{};
+ u32 mole_scale{};
+ u32 mole_x{};
+ u32 mole_y{};
+ u32 height{};
+ u32 weight{};
+ Gender gender{};
+ u32 favorite_color{};
+ u32 region{};
+ FontRegion font_region{};
+ u32 type{};
+ INSERT_PADDING_WORDS(5);
+};
+static_assert(sizeof(DefaultMii) == 0xd8, "MiiStoreData has incorrect size.");
+
+#pragma pack(pop)
+
+// The Mii manager is responsible for loading and storing the Miis to the database in NAND along
+// with providing an easy interface for HLE emulation of the mii service.
+class MiiManager {
+public:
+ MiiManager();
+
+ bool CheckAndResetUpdateCounter(SourceFlag source_flag, u64& current_update_counter);
+ bool IsFullDatabase() const;
+ u32 GetCount(SourceFlag source_flag) const;
+ ResultVal<MiiInfo> UpdateLatest(const MiiInfo& info, SourceFlag source_flag);
+ MiiInfo BuildRandom(Age age, Gender gender, Race race);
+ MiiInfo BuildDefault(std::size_t index);
+ ResultVal<std::vector<MiiInfoElement>> GetDefault(SourceFlag source_flag);
+ ResultCode GetIndex(const MiiInfo& info, u32& index);
+
+private:
+ const Common::UUID user_id{Common::INVALID_UUID};
+ u64 update_counter{};
+};
+
+}; // namespace Service::Mii
diff --git a/src/core/hle/service/mii/raw_data.h b/src/core/hle/service/mii/raw_data.h
index 0e35d69d2..a0d2b9d3a 100644
--- a/src/core/hle/service/mii/raw_data.h
+++ b/src/core/hle/service/mii/raw_data.h
@@ -7,7 +7,7 @@
#include <array>
#include "common/common_types.h"
-#include "core/hle/service/mii/manager.h"
+#include "core/hle/service/mii/mii_manager.h"
namespace Service::Mii::RawData {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index f03b2666a..e742db48f 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -179,7 +179,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- if (Settings::values.bcat_backend == "none") {
+ if (Settings::values.bcat_backend.GetValue() == "none") {
rb.PushEnum(RequestState::NotSubmitted);
} else {
rb.PushEnum(RequestState::Connected);
@@ -384,7 +384,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- if (Settings::values.bcat_backend == "none") {
+ if (Settings::values.bcat_backend.GetValue() == "none") {
rb.Push<u8>(0);
} else {
rb.Push<u8>(1);
@@ -395,7 +395,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- if (Settings::values.bcat_backend == "none") {
+ if (Settings::values.bcat_backend.GetValue() == "none") {
rb.Push<u8>(0);
} else {
rb.Push<u8>(1);
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 2cc0da124..ce6065db2 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -54,7 +54,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);
- system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
+ system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 98e6296f1..1403a39d0 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -19,26 +19,29 @@
namespace Service::Nvidia::Devices {
namespace {
-// Splice vectors will copy count amount of type T from the input vector into the dst vector.
+// Copies count amount of type T from the input vector into the dst vector.
+// Returns the number of bytes written into dst.
template <typename T>
-std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
- std::size_t offset) {
- if (!dst.empty()) {
- std::memcpy(dst.data(), input.data() + offset, count * sizeof(T));
+std::size_t SliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
+ std::size_t offset) {
+ if (dst.empty()) {
+ return 0;
}
- return 0;
+ const size_t bytes_copied = count * sizeof(T);
+ std::memcpy(dst.data(), input.data() + offset, bytes_copied);
+ return bytes_copied;
}
-// Write vectors will write data to the output buffer
+// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
+// Returns the number of bytes written into dst.
template <typename T>
std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
if (src.empty()) {
return 0;
- } else {
- std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T));
- offset += src.size() * sizeof(T);
- return offset;
}
+ const size_t bytes_copied = src.size() * sizeof(T);
+ std::memcpy(dst.data() + offset, src.data(), bytes_copied);
+ return bytes_copied;
}
} // Anonymous namespace
@@ -62,7 +65,6 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
// Instantiate param buffers
- std::size_t offset = sizeof(IoctlSubmit);
std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
std::vector<Reloc> relocs(params.relocation_count);
std::vector<u32> reloc_shifts(params.relocation_count);
@@ -70,13 +72,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
std::vector<SyncptIncr> wait_checks(params.syncpoint_count);
std::vector<Fence> fences(params.fence_count);
- // Splice input into their respective buffers
- offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
- offset = SpliceVectors(input, relocs, params.relocation_count, offset);
- offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
- offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
- offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
- offset = SpliceVectors(input, fences, params.fence_count, offset);
+ // Slice input into their respective buffers
+ std::size_t offset = sizeof(IoctlSubmit);
+ offset += SliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
+ offset += SliceVectors(input, relocs, params.relocation_count, offset);
+ offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset);
+ offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
+ offset += SliceVectors(input, wait_checks, params.syncpoint_count, offset);
+ offset += SliceVectors(input, fences, params.fence_count, offset);
auto& gpu = system.GPU();
if (gpu.UseNvdec()) {
@@ -88,35 +91,27 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
}
}
for (const auto& cmd_buffer : command_buffers) {
- auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
+ const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
- const auto map = FindBufferMap(object->dma_map_addr);
- if (!map) {
- LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}",
- object->addr, object->dma_map_addr);
- return NvResult::Success;
- }
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
- gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(),
- cmdlist.size() * sizeof(u32));
+ system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
+ cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(cmdlist);
}
if (gpu.UseNvdec()) {
-
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
-
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
gpu.PushCommandBuffer(cmdlist);
}
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back
offset = sizeof(IoctlSubmit);
- offset = WriteVectors(output, command_buffers, offset);
- offset = WriteVectors(output, relocs, offset);
- offset = WriteVectors(output, reloc_shifts, offset);
- offset = WriteVectors(output, syncpt_increments, offset);
- offset = WriteVectors(output, wait_checks, offset);
- offset = WriteVectors(output, fences, offset);
+ offset += WriteVectors(output, command_buffers, offset);
+ offset += WriteVectors(output, relocs, offset);
+ offset += WriteVectors(output, reloc_shifts, offset);
+ offset += WriteVectors(output, syncpt_increments, offset);
+ offset += WriteVectors(output, wait_checks, offset);
+ offset += WriteVectors(output, fences, offset);
return NvResult::Success;
}
@@ -148,14 +143,14 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
- SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
+ SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
auto& gpu = system.GPU();
- for (auto& cmf_buff : cmd_buffer_handles) {
- auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
+ for (auto& cmd_buffer : cmd_buffer_handles) {
+ auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
if (!object) {
- LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
+ LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
}
@@ -170,7 +165,7 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
if (!object->dma_map_addr) {
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
} else {
- cmf_buff.map_address = object->dma_map_addr;
+ cmd_buffer.map_address = object->dma_map_addr;
AddBufferMap(object->dma_map_addr, object->size, object->addr,
object->status == nvmap::Object::Status::Allocated);
}
@@ -186,14 +181,14 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vec
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
- SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
+ SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
auto& gpu = system.GPU();
- for (auto& cmf_buff : cmd_buffer_handles) {
- const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
+ for (auto& cmd_buffer : cmd_buffer_handles) {
+ const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
if (!object) {
- LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
+ LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
std::memcpy(output.data(), &params, output.size());
return NvResult::InvalidState;
}
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
deleted file mode 100644
index e4d495000..000000000
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cinttypes>
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/k_readable_event.h"
-#include "core/hle/kernel/k_thread.h"
-#include "core/hle/kernel/k_writable_event.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/service/nvdrv/interface.h"
-#include "core/hle/service/nvdrv/nvdata.h"
-#include "core/hle/service/nvdrv/nvdrv.h"
-
-namespace Service::Nvidia {
-
-void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
- nvdrv->SignalSyncpt(syncpoint_id, value);
-}
-
-void NVDRV::Open(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_NVDRV, "called");
- IPC::ResponseBuilder rb{ctx, 4};
- rb.Push(ResultSuccess);
-
- if (!is_initialized) {
- rb.Push<DeviceFD>(0);
- rb.PushEnum(NvResult::NotInitialized);
-
- LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
- return;
- }
-
- const auto& buffer = ctx.ReadBuffer();
- const std::string device_name(buffer.begin(), buffer.end());
-
- if (device_name == "/dev/nvhost-prof-gpu") {
- rb.Push<DeviceFD>(0);
- rb.PushEnum(NvResult::NotSupported);
-
- LOG_WARNING(Service_NVDRV, "/dev/nvhost-prof-gpu cannot be opened in production");
- return;
- }
-
- DeviceFD fd = nvdrv->Open(device_name);
-
- rb.Push<DeviceFD>(fd);
- rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed);
-}
-
-void NVDRV::ServiceError(Kernel::HLERequestContext& ctx, NvResult result) {
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(result);
-}
-
-void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto fd = rp.Pop<DeviceFD>();
- const auto command = rp.PopRaw<Ioctl>();
- LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
-
- if (!is_initialized) {
- ServiceError(ctx, NvResult::NotInitialized);
- LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
- return;
- }
-
- // Check device
- std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
- const auto input_buffer = ctx.ReadBuffer(0);
-
- const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
- if (command.is_out != 0) {
- ctx.WriteBuffer(output_buffer);
- }
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(nv_result);
-}
-
-void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto fd = rp.Pop<DeviceFD>();
- const auto command = rp.PopRaw<Ioctl>();
- LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
-
- if (!is_initialized) {
- ServiceError(ctx, NvResult::NotInitialized);
- LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
- return;
- }
-
- const auto input_buffer = ctx.ReadBuffer(0);
- const auto input_inlined_buffer = ctx.ReadBuffer(1);
- std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
-
- const auto nv_result =
- nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
- if (command.is_out != 0) {
- ctx.WriteBuffer(output_buffer);
- }
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(nv_result);
-}
-
-void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto fd = rp.Pop<DeviceFD>();
- const auto command = rp.PopRaw<Ioctl>();
- LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
-
- if (!is_initialized) {
- ServiceError(ctx, NvResult::NotInitialized);
- LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
- return;
- }
-
- const auto input_buffer = ctx.ReadBuffer(0);
- std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
- std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));
-
- const auto nv_result =
- nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
- if (command.is_out != 0) {
- ctx.WriteBuffer(output_buffer, 0);
- ctx.WriteBuffer(output_buffer_inline, 1);
- }
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(nv_result);
-}
-
-void NVDRV::Close(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_NVDRV, "called");
-
- if (!is_initialized) {
- ServiceError(ctx, NvResult::NotInitialized);
- LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
- return;
- }
-
- IPC::RequestParser rp{ctx};
- const auto fd = rp.Pop<DeviceFD>();
- const auto result = nvdrv->Close(fd);
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(result);
-}
-
-void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
- LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-
- is_initialized = true;
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(NvResult::Success);
-}
-
-void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto fd = rp.Pop<DeviceFD>();
- const auto event_id = rp.Pop<u32>() & 0x00FF;
- LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
-
- if (!is_initialized) {
- ServiceError(ctx, NvResult::NotInitialized);
- LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
- return;
- }
-
- const auto nv_result = nvdrv->VerifyFD(fd);
- if (nv_result != NvResult::Success) {
- LOG_ERROR(Service_NVDRV, "Invalid FD specified DeviceFD={}!", fd);
- ServiceError(ctx, nv_result);
- return;
- }
-
- if (event_id < MaxNvEvents) {
- IPC::ResponseBuilder rb{ctx, 3, 1};
- rb.Push(ResultSuccess);
- auto& event = nvdrv->GetEvent(event_id);
- event.Clear();
- rb.PushCopyObjects(event);
- rb.PushEnum(NvResult::Success);
- } else {
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(NvResult::BadParameter);
- }
-}
-
-void NVDRV::SetAruid(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- pid = rp.Pop<u64>();
- LOG_WARNING(Service_NVDRV, "(STUBBED) called, pid=0x{:X}", pid);
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(NvResult::Success);
-}
-
-void NVDRV::SetGraphicsFirmwareMemoryMarginEnabled(Kernel::HLERequestContext& ctx) {
- LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
-}
-
-void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
- LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.PushEnum(NvResult::Success);
-}
-
-void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
- // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on
- // retail hardware.
- LOG_DEBUG(Service_NVDRV, "called");
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
-}
-
-NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* name)
- : ServiceFramework{system_, name}, nvdrv{std::move(nvdrv_)} {
- static const FunctionInfo functions[] = {
- {0, &NVDRV::Open, "Open"},
- {1, &NVDRV::Ioctl1, "Ioctl"},
- {2, &NVDRV::Close, "Close"},
- {3, &NVDRV::Initialize, "Initialize"},
- {4, &NVDRV::QueryEvent, "QueryEvent"},
- {5, nullptr, "MapSharedMem"},
- {6, &NVDRV::GetStatus, "GetStatus"},
- {7, nullptr, "SetAruidForTest"},
- {8, &NVDRV::SetAruid, "SetAruid"},
- {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"},
- {10, nullptr, "InitializeDevtools"},
- {11, &NVDRV::Ioctl2, "Ioctl2"},
- {12, &NVDRV::Ioctl3, "Ioctl3"},
- {13, &NVDRV::SetGraphicsFirmwareMemoryMarginEnabled,
- "SetGraphicsFirmwareMemoryMarginEnabled"},
- };
- RegisterHandlers(functions);
-}
-
-NVDRV::~NVDRV() = default;
-
-} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 74796dce1..ff405099a 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -20,8 +20,8 @@
#include "core/hle/service/nvdrv/devices/nvhost_nvjpg.h"
#include "core/hle/service/nvdrv/devices/nvhost_vic.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
-#include "core/hle/service/nvdrv/interface.h"
#include "core/hle/service/nvdrv/nvdrv.h"
+#include "core/hle/service/nvdrv/nvdrv_interface.h"
#include "core/hle/service/nvdrv/nvmemp.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/nvflinger/nvflinger.h"
@@ -39,11 +39,11 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
nvflinger.SetNVDrvInstance(module_);
}
-Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
- auto& kernel = system.Kernel();
+Module::Module(Core::System& system)
+ : syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} {
for (u32 i = 0; i < MaxNvEvents; i++) {
- events_interface.events[i].event = Kernel::KEvent::Create(kernel);
- events_interface.events[i].event->Initialize(fmt::format("NVDRV::NvEvent_{}", i));
+ events_interface.events[i].event =
+ service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i));
events_interface.status[i] = EventState::Free;
events_interface.registered[i] = false;
}
@@ -65,8 +65,7 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
Module::~Module() {
for (u32 i = 0; i < MaxNvEvents; i++) {
- events_interface.events[i].event->Close();
- events_interface.events[i].event = nullptr;
+ service_context.CloseEvent(events_interface.events[i].event);
}
}
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index a43ceb7ae..e2a1dde5b 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -9,6 +9,7 @@
#include <vector>
#include "common/common_types.h"
+#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvdrv/syncpoint_manager.h"
#include "core/hle/service/service.h"
@@ -154,6 +155,8 @@ private:
std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
EventInterface events_interface;
+
+ KernelHelpers::ServiceContext service_context;
};
/// Registers all NVDRV services with the specified service manager.
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
new file mode 100644
index 000000000..d61fb73dc
--- /dev/null
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -0,0 +1,259 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cinttypes>
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_readable_event.h"
+#include "core/hle/kernel/k_thread.h"
+#include "core/hle/kernel/k_writable_event.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+#include "core/hle/service/nvdrv/nvdrv.h"
+#include "core/hle/service/nvdrv/nvdrv_interface.h"
+
+namespace Service::Nvidia {
+
+void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
+ nvdrv->SignalSyncpt(syncpoint_id, value);
+}
+
+void NVDRV::Open(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_NVDRV, "called");
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(ResultSuccess);
+
+ if (!is_initialized) {
+ rb.Push<DeviceFD>(0);
+ rb.PushEnum(NvResult::NotInitialized);
+
+ LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+ return;
+ }
+
+ const auto& buffer = ctx.ReadBuffer();
+ const std::string device_name(buffer.begin(), buffer.end());
+
+ if (device_name == "/dev/nvhost-prof-gpu") {
+ rb.Push<DeviceFD>(0);
+ rb.PushEnum(NvResult::NotSupported);
+
+ LOG_WARNING(Service_NVDRV, "/dev/nvhost-prof-gpu cannot be opened in production");
+ return;
+ }
+
+ DeviceFD fd = nvdrv->Open(device_name);
+
+ rb.Push<DeviceFD>(fd);
+ rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed);
+}
+
+void NVDRV::ServiceError(Kernel::HLERequestContext& ctx, NvResult result) {
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(result);
+}
+
+void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto fd = rp.Pop<DeviceFD>();
+ const auto command = rp.PopRaw<Ioctl>();
+ LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
+
+ if (!is_initialized) {
+ ServiceError(ctx, NvResult::NotInitialized);
+ LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+ return;
+ }
+
+ // Check device
+ std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+ const auto input_buffer = ctx.ReadBuffer(0);
+
+ const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
+ if (command.is_out != 0) {
+ ctx.WriteBuffer(output_buffer);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(nv_result);
+}
+
+void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto fd = rp.Pop<DeviceFD>();
+ const auto command = rp.PopRaw<Ioctl>();
+ LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
+
+ if (!is_initialized) {
+ ServiceError(ctx, NvResult::NotInitialized);
+ LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+ return;
+ }
+
+ const auto input_buffer = ctx.ReadBuffer(0);
+ const auto input_inlined_buffer = ctx.ReadBuffer(1);
+ std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+
+ const auto nv_result =
+ nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
+ if (command.is_out != 0) {
+ ctx.WriteBuffer(output_buffer);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(nv_result);
+}
+
+void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto fd = rp.Pop<DeviceFD>();
+ const auto command = rp.PopRaw<Ioctl>();
+ LOG_DEBUG(Service_NVDRV, "called fd={}, ioctl=0x{:08X}", fd, command.raw);
+
+ if (!is_initialized) {
+ ServiceError(ctx, NvResult::NotInitialized);
+ LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+ return;
+ }
+
+ const auto input_buffer = ctx.ReadBuffer(0);
+ std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+ std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));
+
+ const auto nv_result =
+ nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
+ if (command.is_out != 0) {
+ ctx.WriteBuffer(output_buffer, 0);
+ ctx.WriteBuffer(output_buffer_inline, 1);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(nv_result);
+}
+
+void NVDRV::Close(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_NVDRV, "called");
+
+ if (!is_initialized) {
+ ServiceError(ctx, NvResult::NotInitialized);
+ LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+ return;
+ }
+
+ IPC::RequestParser rp{ctx};
+ const auto fd = rp.Pop<DeviceFD>();
+ const auto result = nvdrv->Close(fd);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(result);
+}
+
+void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+
+ is_initialized = true;
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(NvResult::Success);
+}
+
+void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto fd = rp.Pop<DeviceFD>();
+ const auto event_id = rp.Pop<u32>() & 0x00FF;
+ LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
+
+ if (!is_initialized) {
+ ServiceError(ctx, NvResult::NotInitialized);
+ LOG_ERROR(Service_NVDRV, "NvServices is not initalized!");
+ return;
+ }
+
+ const auto nv_result = nvdrv->VerifyFD(fd);
+ if (nv_result != NvResult::Success) {
+ LOG_ERROR(Service_NVDRV, "Invalid FD specified DeviceFD={}!", fd);
+ ServiceError(ctx, nv_result);
+ return;
+ }
+
+ if (event_id < MaxNvEvents) {
+ IPC::ResponseBuilder rb{ctx, 3, 1};
+ rb.Push(ResultSuccess);
+ auto& event = nvdrv->GetEvent(event_id);
+ event.Clear();
+ rb.PushCopyObjects(event);
+ rb.PushEnum(NvResult::Success);
+ } else {
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(NvResult::BadParameter);
+ }
+}
+
+void NVDRV::SetAruid(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ pid = rp.Pop<u64>();
+ LOG_WARNING(Service_NVDRV, "(STUBBED) called, pid=0x{:X}", pid);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(NvResult::Success);
+}
+
+void NVDRV::SetGraphicsFirmwareMemoryMarginEnabled(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
+void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.PushEnum(NvResult::Success);
+}
+
+void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
+ // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on
+ // retail hardware.
+ LOG_DEBUG(Service_NVDRV, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
+NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* name)
+ : ServiceFramework{system_, name}, nvdrv{std::move(nvdrv_)} {
+ static const FunctionInfo functions[] = {
+ {0, &NVDRV::Open, "Open"},
+ {1, &NVDRV::Ioctl1, "Ioctl"},
+ {2, &NVDRV::Close, "Close"},
+ {3, &NVDRV::Initialize, "Initialize"},
+ {4, &NVDRV::QueryEvent, "QueryEvent"},
+ {5, nullptr, "MapSharedMem"},
+ {6, &NVDRV::GetStatus, "GetStatus"},
+ {7, nullptr, "SetAruidForTest"},
+ {8, &NVDRV::SetAruid, "SetAruid"},
+ {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"},
+ {10, nullptr, "InitializeDevtools"},
+ {11, &NVDRV::Ioctl2, "Ioctl2"},
+ {12, &NVDRV::Ioctl3, "Ioctl3"},
+ {13, &NVDRV::SetGraphicsFirmwareMemoryMarginEnabled,
+ "SetGraphicsFirmwareMemoryMarginEnabled"},
+ };
+ RegisterHandlers(functions);
+}
+
+NVDRV::~NVDRV() = default;
+
+} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 0e764c53f..0e764c53f 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index d1dbc659b..941748970 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -307,8 +307,12 @@ void NVFlinger::Compose() {
}
s64 NVFlinger::GetNextTicks() const {
- constexpr s64 max_hertz = 120LL;
- return (1000000000 * (1LL << swap_interval)) / max_hertz;
+ static constexpr s64 max_hertz = 120LL;
+
+ const auto& settings = Settings::values;
+ const bool unlocked_fps = settings.disable_fps_limit.GetValue();
+ const s64 fps_cap = unlocked_fps ? static_cast<s64>(settings.fps_cap.GetValue()) : 1;
+ return (1000000000 * (1LL << swap_interval)) / (max_hertz * fps_cap);
}
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/pctl/module.cpp b/src/core/hle/service/pctl/module.cpp
deleted file mode 100644
index 1e31d05a6..000000000
--- a/src/core/hle/service/pctl/module.cpp
+++ /dev/null
@@ -1,406 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/file_sys/control_metadata.h"
-#include "core/file_sys/patch_manager.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/k_process.h"
-#include "core/hle/service/pctl/module.h"
-#include "core/hle/service/pctl/pctl.h"
-
-namespace Service::PCTL {
-
-namespace Error {
-
-constexpr ResultCode ResultNoFreeCommunication{ErrorModule::PCTL, 101};
-constexpr ResultCode ResultStereoVisionRestricted{ErrorModule::PCTL, 104};
-constexpr ResultCode ResultNoCapability{ErrorModule::PCTL, 131};
-constexpr ResultCode ResultNoRestrictionEnabled{ErrorModule::PCTL, 181};
-
-} // namespace Error
-
-class IParentalControlService final : public ServiceFramework<IParentalControlService> {
-public:
- explicit IParentalControlService(Core::System& system_, Capability capability_)
- : ServiceFramework{system_, "IParentalControlService"}, capability{capability_} {
- // clang-format off
- static const FunctionInfo functions[] = {
- {1, &IParentalControlService::Initialize, "Initialize"},
- {1001, &IParentalControlService::CheckFreeCommunicationPermission, "CheckFreeCommunicationPermission"},
- {1002, nullptr, "ConfirmLaunchApplicationPermission"},
- {1003, nullptr, "ConfirmResumeApplicationPermission"},
- {1004, nullptr, "ConfirmSnsPostPermission"},
- {1005, nullptr, "ConfirmSystemSettingsPermission"},
- {1006, nullptr, "IsRestrictionTemporaryUnlocked"},
- {1007, nullptr, "RevertRestrictionTemporaryUnlocked"},
- {1008, nullptr, "EnterRestrictedSystemSettings"},
- {1009, nullptr, "LeaveRestrictedSystemSettings"},
- {1010, nullptr, "IsRestrictedSystemSettingsEntered"},
- {1011, nullptr, "RevertRestrictedSystemSettingsEntered"},
- {1012, nullptr, "GetRestrictedFeatures"},
- {1013, &IParentalControlService::ConfirmStereoVisionPermission, "ConfirmStereoVisionPermission"},
- {1014, nullptr, "ConfirmPlayableApplicationVideoOld"},
- {1015, nullptr, "ConfirmPlayableApplicationVideo"},
- {1016, nullptr, "ConfirmShowNewsPermission"},
- {1017, nullptr, "EndFreeCommunication"},
- {1018, &IParentalControlService::IsFreeCommunicationAvailable, "IsFreeCommunicationAvailable"},
- {1031, &IParentalControlService::IsRestrictionEnabled, "IsRestrictionEnabled"},
- {1032, nullptr, "GetSafetyLevel"},
- {1033, nullptr, "SetSafetyLevel"},
- {1034, nullptr, "GetSafetyLevelSettings"},
- {1035, nullptr, "GetCurrentSettings"},
- {1036, nullptr, "SetCustomSafetyLevelSettings"},
- {1037, nullptr, "GetDefaultRatingOrganization"},
- {1038, nullptr, "SetDefaultRatingOrganization"},
- {1039, nullptr, "GetFreeCommunicationApplicationListCount"},
- {1042, nullptr, "AddToFreeCommunicationApplicationList"},
- {1043, nullptr, "DeleteSettings"},
- {1044, nullptr, "GetFreeCommunicationApplicationList"},
- {1045, nullptr, "UpdateFreeCommunicationApplicationList"},
- {1046, nullptr, "DisableFeaturesForReset"},
- {1047, nullptr, "NotifyApplicationDownloadStarted"},
- {1048, nullptr, "NotifyNetworkProfileCreated"},
- {1049, nullptr, "ResetFreeCommunicationApplicationList"},
- {1061, &IParentalControlService::ConfirmStereoVisionRestrictionConfigurable, "ConfirmStereoVisionRestrictionConfigurable"},
- {1062, &IParentalControlService::GetStereoVisionRestriction, "GetStereoVisionRestriction"},
- {1063, &IParentalControlService::SetStereoVisionRestriction, "SetStereoVisionRestriction"},
- {1064, &IParentalControlService::ResetConfirmedStereoVisionPermission, "ResetConfirmedStereoVisionPermission"},
- {1065, &IParentalControlService::IsStereoVisionPermitted, "IsStereoVisionPermitted"},
- {1201, nullptr, "UnlockRestrictionTemporarily"},
- {1202, nullptr, "UnlockSystemSettingsRestriction"},
- {1203, nullptr, "SetPinCode"},
- {1204, nullptr, "GenerateInquiryCode"},
- {1205, nullptr, "CheckMasterKey"},
- {1206, nullptr, "GetPinCodeLength"},
- {1207, nullptr, "GetPinCodeChangedEvent"},
- {1208, nullptr, "GetPinCode"},
- {1403, nullptr, "IsPairingActive"},
- {1406, nullptr, "GetSettingsLastUpdated"},
- {1411, nullptr, "GetPairingAccountInfo"},
- {1421, nullptr, "GetAccountNickname"},
- {1424, nullptr, "GetAccountState"},
- {1425, nullptr, "RequestPostEvents"},
- {1426, nullptr, "GetPostEventInterval"},
- {1427, nullptr, "SetPostEventInterval"},
- {1432, nullptr, "GetSynchronizationEvent"},
- {1451, nullptr, "StartPlayTimer"},
- {1452, nullptr, "StopPlayTimer"},
- {1453, nullptr, "IsPlayTimerEnabled"},
- {1454, nullptr, "GetPlayTimerRemainingTime"},
- {1455, nullptr, "IsRestrictedByPlayTimer"},
- {1456, nullptr, "GetPlayTimerSettings"},
- {1457, nullptr, "GetPlayTimerEventToRequestSuspension"},
- {1458, nullptr, "IsPlayTimerAlarmDisabled"},
- {1471, nullptr, "NotifyWrongPinCodeInputManyTimes"},
- {1472, nullptr, "CancelNetworkRequest"},
- {1473, nullptr, "GetUnlinkedEvent"},
- {1474, nullptr, "ClearUnlinkedEvent"},
- {1601, nullptr, "DisableAllFeatures"},
- {1602, nullptr, "PostEnableAllFeatures"},
- {1603, nullptr, "IsAllFeaturesDisabled"},
- {1901, nullptr, "DeleteFromFreeCommunicationApplicationListForDebug"},
- {1902, nullptr, "ClearFreeCommunicationApplicationListForDebug"},
- {1903, nullptr, "GetExemptApplicationListCountForDebug"},
- {1904, nullptr, "GetExemptApplicationListForDebug"},
- {1905, nullptr, "UpdateExemptApplicationListForDebug"},
- {1906, nullptr, "AddToExemptApplicationListForDebug"},
- {1907, nullptr, "DeleteFromExemptApplicationListForDebug"},
- {1908, nullptr, "ClearExemptApplicationListForDebug"},
- {1941, nullptr, "DeletePairing"},
- {1951, nullptr, "SetPlayTimerSettingsForDebug"},
- {1952, nullptr, "GetPlayTimerSpentTimeForTest"},
- {1953, nullptr, "SetPlayTimerAlarmDisabledForDebug"},
- {2001, nullptr, "RequestPairingAsync"},
- {2002, nullptr, "FinishRequestPairing"},
- {2003, nullptr, "AuthorizePairingAsync"},
- {2004, nullptr, "FinishAuthorizePairing"},
- {2005, nullptr, "RetrievePairingInfoAsync"},
- {2006, nullptr, "FinishRetrievePairingInfo"},
- {2007, nullptr, "UnlinkPairingAsync"},
- {2008, nullptr, "FinishUnlinkPairing"},
- {2009, nullptr, "GetAccountMiiImageAsync"},
- {2010, nullptr, "FinishGetAccountMiiImage"},
- {2011, nullptr, "GetAccountMiiImageContentTypeAsync"},
- {2012, nullptr, "FinishGetAccountMiiImageContentType"},
- {2013, nullptr, "SynchronizeParentalControlSettingsAsync"},
- {2014, nullptr, "FinishSynchronizeParentalControlSettings"},
- {2015, nullptr, "FinishSynchronizeParentalControlSettingsWithLastUpdated"},
- {2016, nullptr, "RequestUpdateExemptionListAsync"},
- };
- // clang-format on
- RegisterHandlers(functions);
- }
-
-private:
- bool CheckFreeCommunicationPermissionImpl() const {
- if (states.temporary_unlocked) {
- return true;
- }
- if ((states.application_info.parental_control_flag & 1) == 0) {
- return true;
- }
- if (pin_code[0] == '\0') {
- return true;
- }
- if (!settings.is_free_communication_default_on) {
- return true;
- }
- // TODO(ogniK): Check for blacklisted/exempted applications. Return false can happen here
- // but as we don't have multiproceses support yet, we can just assume our application is
- // valid for the time being
- return true;
- }
-
- bool ConfirmStereoVisionPermissionImpl() const {
- if (states.temporary_unlocked) {
- return true;
- }
- if (pin_code[0] == '\0') {
- return true;
- }
- if (!settings.is_stero_vision_restricted) {
- return false;
- }
- return true;
- }
-
- void SetStereoVisionRestrictionImpl(bool is_restricted) {
- if (settings.disabled) {
- return;
- }
-
- if (pin_code[0] == '\0') {
- return;
- }
- settings.is_stero_vision_restricted = is_restricted;
- }
-
- void Initialize(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
- IPC::ResponseBuilder rb{ctx, 2};
-
- if (False(capability & (Capability::Application | Capability::System))) {
- LOG_ERROR(Service_PCTL, "Invalid capability! capability={:X}", capability);
- return;
- }
-
- // TODO(ogniK): Recovery flag initialization for pctl:r
-
- const auto tid = system.CurrentProcess()->GetTitleID();
- if (tid != 0) {
- const FileSys::PatchManager pm{tid, system.GetFileSystemController(),
- system.GetContentProvider()};
- const auto control = pm.GetControlMetadata();
- if (control.first) {
- states.tid_from_event = 0;
- states.launch_time_valid = false;
- states.is_suspended = false;
- states.free_communication = false;
- states.stereo_vision = false;
- states.application_info = ApplicationInfo{
- .tid = tid,
- .age_rating = control.first->GetRatingAge(),
- .parental_control_flag = control.first->GetParentalControlFlag(),
- .capability = capability,
- };
-
- if (False(capability & (Capability::System | Capability::Recovery))) {
- // TODO(ogniK): Signal application launch event
- }
- }
- }
-
- rb.Push(ResultSuccess);
- }
-
- void CheckFreeCommunicationPermission(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 2};
- if (!CheckFreeCommunicationPermissionImpl()) {
- rb.Push(Error::ResultNoFreeCommunication);
- } else {
- rb.Push(ResultSuccess);
- }
-
- states.free_communication = true;
- }
-
- void ConfirmStereoVisionPermission(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
- states.stereo_vision = true;
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- void IsFreeCommunicationAvailable(Kernel::HLERequestContext& ctx) {
- LOG_WARNING(Service_PCTL, "(STUBBED) called");
-
- IPC::ResponseBuilder rb{ctx, 2};
- if (!CheckFreeCommunicationPermissionImpl()) {
- rb.Push(Error::ResultNoFreeCommunication);
- } else {
- rb.Push(ResultSuccess);
- }
- }
-
- void IsRestrictionEnabled(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- if (False(capability & (Capability::Status | Capability::Recovery))) {
- LOG_ERROR(Service_PCTL, "Application does not have Status or Recovery capabilities!");
- rb.Push(Error::ResultNoCapability);
- rb.Push(false);
- return;
- }
-
- rb.Push(pin_code[0] != '\0');
- }
-
- void ConfirmStereoVisionRestrictionConfigurable(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 2};
-
- if (False(capability & Capability::StereoVision)) {
- LOG_ERROR(Service_PCTL, "Application does not have StereoVision capability!");
- rb.Push(Error::ResultNoCapability);
- return;
- }
-
- if (pin_code[0] == '\0') {
- rb.Push(Error::ResultNoRestrictionEnabled);
- return;
- }
-
- rb.Push(ResultSuccess);
- }
-
- void IsStereoVisionPermitted(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- if (!ConfirmStereoVisionPermissionImpl()) {
- rb.Push(Error::ResultStereoVisionRestricted);
- rb.Push(false);
- } else {
- rb.Push(ResultSuccess);
- rb.Push(true);
- }
- }
-
- void SetStereoVisionRestriction(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- const auto can_use = rp.Pop<bool>();
- LOG_DEBUG(Service_PCTL, "called, can_use={}", can_use);
-
- IPC::ResponseBuilder rb{ctx, 2};
- if (False(capability & Capability::StereoVision)) {
- LOG_ERROR(Service_PCTL, "Application does not have StereoVision capability!");
- rb.Push(Error::ResultNoCapability);
- return;
- }
-
- SetStereoVisionRestrictionImpl(can_use);
- rb.Push(ResultSuccess);
- }
-
- void GetStereoVisionRestriction(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- if (False(capability & Capability::StereoVision)) {
- LOG_ERROR(Service_PCTL, "Application does not have StereoVision capability!");
- rb.Push(Error::ResultNoCapability);
- rb.Push(false);
- return;
- }
-
- rb.Push(ResultSuccess);
- rb.Push(settings.is_stero_vision_restricted);
- }
-
- void ResetConfirmedStereoVisionPermission(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- states.stereo_vision = false;
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
- }
-
- struct ApplicationInfo {
- u64 tid{};
- std::array<u8, 32> age_rating{};
- u32 parental_control_flag{};
- Capability capability{};
- };
-
- struct States {
- u64 current_tid{};
- ApplicationInfo application_info{};
- u64 tid_from_event{};
- bool launch_time_valid{};
- bool is_suspended{};
- bool temporary_unlocked{};
- bool free_communication{};
- bool stereo_vision{};
- };
-
- struct ParentalControlSettings {
- bool is_stero_vision_restricted{};
- bool is_free_communication_default_on{};
- bool disabled{};
- };
-
- States states{};
- ParentalControlSettings settings{};
- std::array<char, 8> pin_code{};
- Capability capability{};
-};
-
-void Module::Interface::CreateService(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- // TODO(ogniK): Get TID from process
-
- rb.PushIpcInterface<IParentalControlService>(system, capability);
-}
-
-void Module::Interface::CreateServiceWithoutInitialize(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_PCTL, "called");
-
- IPC::ResponseBuilder rb{ctx, 2, 0, 1};
- rb.Push(ResultSuccess);
- rb.PushIpcInterface<IParentalControlService>(system, capability);
-}
-
-Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
- const char* name_, Capability capability_)
- : ServiceFramework{system_, name_}, module{std::move(module_)}, capability{capability_} {}
-
-Module::Interface::~Interface() = default;
-
-void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
- auto module = std::make_shared<Module>();
- std::make_shared<PCTL>(system, module, "pctl",
- Capability::Application | Capability::SnsPost | Capability::Status |
- Capability::StereoVision)
- ->InstallAsService(service_manager);
- // TODO(ogniK): Implement remaining capabilities
- std::make_shared<PCTL>(system, module, "pctl:a", Capability::None)
- ->InstallAsService(service_manager);
- std::make_shared<PCTL>(system, module, "pctl:r", Capability::None)
- ->InstallAsService(service_manager);
- std::make_shared<PCTL>(system, module, "pctl:s", Capability::None)
- ->InstallAsService(service_manager);
-}
-
-} // namespace Service::PCTL
diff --git a/src/core/hle/service/pctl/pctl.h b/src/core/hle/service/pctl/pctl.h
index ea3b97823..1d28900b2 100644
--- a/src/core/hle/service/pctl/pctl.h
+++ b/src/core/hle/service/pctl/pctl.h
@@ -4,7 +4,7 @@
#pragma once
-#include "core/hle/service/pctl/module.h"
+#include "core/hle/service/pctl/pctl_module.h"
namespace Core {
class System;
diff --git a/src/core/hle/service/pctl/pctl_module.cpp b/src/core/hle/service/pctl/pctl_module.cpp
new file mode 100644
index 000000000..6949fcf3b
--- /dev/null
+++ b/src/core/hle/service/pctl/pctl_module.cpp
@@ -0,0 +1,406 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/file_sys/control_metadata.h"
+#include "core/file_sys/patch_manager.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_process.h"
+#include "core/hle/service/pctl/pctl.h"
+#include "core/hle/service/pctl/pctl_module.h"
+
+namespace Service::PCTL {
+
+namespace Error {
+
+constexpr ResultCode ResultNoFreeCommunication{ErrorModule::PCTL, 101};
+constexpr ResultCode ResultStereoVisionRestricted{ErrorModule::PCTL, 104};
+constexpr ResultCode ResultNoCapability{ErrorModule::PCTL, 131};
+constexpr ResultCode ResultNoRestrictionEnabled{ErrorModule::PCTL, 181};
+
+} // namespace Error
+
+class IParentalControlService final : public ServiceFramework<IParentalControlService> {
+public:
+ explicit IParentalControlService(Core::System& system_, Capability capability_)
+ : ServiceFramework{system_, "IParentalControlService"}, capability{capability_} {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {1, &IParentalControlService::Initialize, "Initialize"},
+ {1001, &IParentalControlService::CheckFreeCommunicationPermission, "CheckFreeCommunicationPermission"},
+ {1002, nullptr, "ConfirmLaunchApplicationPermission"},
+ {1003, nullptr, "ConfirmResumeApplicationPermission"},
+ {1004, nullptr, "ConfirmSnsPostPermission"},
+ {1005, nullptr, "ConfirmSystemSettingsPermission"},
+ {1006, nullptr, "IsRestrictionTemporaryUnlocked"},
+ {1007, nullptr, "RevertRestrictionTemporaryUnlocked"},
+ {1008, nullptr, "EnterRestrictedSystemSettings"},
+ {1009, nullptr, "LeaveRestrictedSystemSettings"},
+ {1010, nullptr, "IsRestrictedSystemSettingsEntered"},
+ {1011, nullptr, "RevertRestrictedSystemSettingsEntered"},
+ {1012, nullptr, "GetRestrictedFeatures"},
+ {1013, &IParentalControlService::ConfirmStereoVisionPermission, "ConfirmStereoVisionPermission"},
+ {1014, nullptr, "ConfirmPlayableApplicationVideoOld"},
+ {1015, nullptr, "ConfirmPlayableApplicationVideo"},
+ {1016, nullptr, "ConfirmShowNewsPermission"},
+ {1017, nullptr, "EndFreeCommunication"},
+ {1018, &IParentalControlService::IsFreeCommunicationAvailable, "IsFreeCommunicationAvailable"},
+ {1031, &IParentalControlService::IsRestrictionEnabled, "IsRestrictionEnabled"},
+ {1032, nullptr, "GetSafetyLevel"},
+ {1033, nullptr, "SetSafetyLevel"},
+ {1034, nullptr, "GetSafetyLevelSettings"},
+ {1035, nullptr, "GetCurrentSettings"},
+ {1036, nullptr, "SetCustomSafetyLevelSettings"},
+ {1037, nullptr, "GetDefaultRatingOrganization"},
+ {1038, nullptr, "SetDefaultRatingOrganization"},
+ {1039, nullptr, "GetFreeCommunicationApplicationListCount"},
+ {1042, nullptr, "AddToFreeCommunicationApplicationList"},
+ {1043, nullptr, "DeleteSettings"},
+ {1044, nullptr, "GetFreeCommunicationApplicationList"},
+ {1045, nullptr, "UpdateFreeCommunicationApplicationList"},
+ {1046, nullptr, "DisableFeaturesForReset"},
+ {1047, nullptr, "NotifyApplicationDownloadStarted"},
+ {1048, nullptr, "NotifyNetworkProfileCreated"},
+ {1049, nullptr, "ResetFreeCommunicationApplicationList"},
+ {1061, &IParentalControlService::ConfirmStereoVisionRestrictionConfigurable, "ConfirmStereoVisionRestrictionConfigurable"},
+ {1062, &IParentalControlService::GetStereoVisionRestriction, "GetStereoVisionRestriction"},
+ {1063, &IParentalControlService::SetStereoVisionRestriction, "SetStereoVisionRestriction"},
+ {1064, &IParentalControlService::ResetConfirmedStereoVisionPermission, "ResetConfirmedStereoVisionPermission"},
+ {1065, &IParentalControlService::IsStereoVisionPermitted, "IsStereoVisionPermitted"},
+ {1201, nullptr, "UnlockRestrictionTemporarily"},
+ {1202, nullptr, "UnlockSystemSettingsRestriction"},
+ {1203, nullptr, "SetPinCode"},
+ {1204, nullptr, "GenerateInquiryCode"},
+ {1205, nullptr, "CheckMasterKey"},
+ {1206, nullptr, "GetPinCodeLength"},
+ {1207, nullptr, "GetPinCodeChangedEvent"},
+ {1208, nullptr, "GetPinCode"},
+ {1403, nullptr, "IsPairingActive"},
+ {1406, nullptr, "GetSettingsLastUpdated"},
+ {1411, nullptr, "GetPairingAccountInfo"},
+ {1421, nullptr, "GetAccountNickname"},
+ {1424, nullptr, "GetAccountState"},
+ {1425, nullptr, "RequestPostEvents"},
+ {1426, nullptr, "GetPostEventInterval"},
+ {1427, nullptr, "SetPostEventInterval"},
+ {1432, nullptr, "GetSynchronizationEvent"},
+ {1451, nullptr, "StartPlayTimer"},
+ {1452, nullptr, "StopPlayTimer"},
+ {1453, nullptr, "IsPlayTimerEnabled"},
+ {1454, nullptr, "GetPlayTimerRemainingTime"},
+ {1455, nullptr, "IsRestrictedByPlayTimer"},
+ {1456, nullptr, "GetPlayTimerSettings"},
+ {1457, nullptr, "GetPlayTimerEventToRequestSuspension"},
+ {1458, nullptr, "IsPlayTimerAlarmDisabled"},
+ {1471, nullptr, "NotifyWrongPinCodeInputManyTimes"},
+ {1472, nullptr, "CancelNetworkRequest"},
+ {1473, nullptr, "GetUnlinkedEvent"},
+ {1474, nullptr, "ClearUnlinkedEvent"},
+ {1601, nullptr, "DisableAllFeatures"},
+ {1602, nullptr, "PostEnableAllFeatures"},
+ {1603, nullptr, "IsAllFeaturesDisabled"},
+ {1901, nullptr, "DeleteFromFreeCommunicationApplicationListForDebug"},
+ {1902, nullptr, "ClearFreeCommunicationApplicationListForDebug"},
+ {1903, nullptr, "GetExemptApplicationListCountForDebug"},
+ {1904, nullptr, "GetExemptApplicationListForDebug"},
+ {1905, nullptr, "UpdateExemptApplicationListForDebug"},
+ {1906, nullptr, "AddToExemptApplicationListForDebug"},
+ {1907, nullptr, "DeleteFromExemptApplicationListForDebug"},
+ {1908, nullptr, "ClearExemptApplicationListForDebug"},
+ {1941, nullptr, "DeletePairing"},
+ {1951, nullptr, "SetPlayTimerSettingsForDebug"},
+ {1952, nullptr, "GetPlayTimerSpentTimeForTest"},
+ {1953, nullptr, "SetPlayTimerAlarmDisabledForDebug"},
+ {2001, nullptr, "RequestPairingAsync"},
+ {2002, nullptr, "FinishRequestPairing"},
+ {2003, nullptr, "AuthorizePairingAsync"},
+ {2004, nullptr, "FinishAuthorizePairing"},
+ {2005, nullptr, "RetrievePairingInfoAsync"},
+ {2006, nullptr, "FinishRetrievePairingInfo"},
+ {2007, nullptr, "UnlinkPairingAsync"},
+ {2008, nullptr, "FinishUnlinkPairing"},
+ {2009, nullptr, "GetAccountMiiImageAsync"},
+ {2010, nullptr, "FinishGetAccountMiiImage"},
+ {2011, nullptr, "GetAccountMiiImageContentTypeAsync"},
+ {2012, nullptr, "FinishGetAccountMiiImageContentType"},
+ {2013, nullptr, "SynchronizeParentalControlSettingsAsync"},
+ {2014, nullptr, "FinishSynchronizeParentalControlSettings"},
+ {2015, nullptr, "FinishSynchronizeParentalControlSettingsWithLastUpdated"},
+ {2016, nullptr, "RequestUpdateExemptionListAsync"},
+ };
+ // clang-format on
+ RegisterHandlers(functions);
+ }
+
+private:
+ bool CheckFreeCommunicationPermissionImpl() const {
+ if (states.temporary_unlocked) {
+ return true;
+ }
+ if ((states.application_info.parental_control_flag & 1) == 0) {
+ return true;
+ }
+ if (pin_code[0] == '\0') {
+ return true;
+ }
+ if (!settings.is_free_communication_default_on) {
+ return true;
+ }
+ // TODO(ogniK): Check for blacklisted/exempted applications. Return false can happen here
+ // but as we don't have multiproceses support yet, we can just assume our application is
+ // valid for the time being
+ return true;
+ }
+
+ bool ConfirmStereoVisionPermissionImpl() const {
+ if (states.temporary_unlocked) {
+ return true;
+ }
+ if (pin_code[0] == '\0') {
+ return true;
+ }
+ if (!settings.is_stero_vision_restricted) {
+ return false;
+ }
+ return true;
+ }
+
+ void SetStereoVisionRestrictionImpl(bool is_restricted) {
+ if (settings.disabled) {
+ return;
+ }
+
+ if (pin_code[0] == '\0') {
+ return;
+ }
+ settings.is_stero_vision_restricted = is_restricted;
+ }
+
+ void Initialize(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+ IPC::ResponseBuilder rb{ctx, 2};
+
+ if (False(capability & (Capability::Application | Capability::System))) {
+ LOG_ERROR(Service_PCTL, "Invalid capability! capability={:X}", capability);
+ return;
+ }
+
+ // TODO(ogniK): Recovery flag initialization for pctl:r
+
+ const auto tid = system.CurrentProcess()->GetTitleID();
+ if (tid != 0) {
+ const FileSys::PatchManager pm{tid, system.GetFileSystemController(),
+ system.GetContentProvider()};
+ const auto control = pm.GetControlMetadata();
+ if (control.first) {
+ states.tid_from_event = 0;
+ states.launch_time_valid = false;
+ states.is_suspended = false;
+ states.free_communication = false;
+ states.stereo_vision = false;
+ states.application_info = ApplicationInfo{
+ .tid = tid,
+ .age_rating = control.first->GetRatingAge(),
+ .parental_control_flag = control.first->GetParentalControlFlag(),
+ .capability = capability,
+ };
+
+ if (False(capability & (Capability::System | Capability::Recovery))) {
+ // TODO(ogniK): Signal application launch event
+ }
+ }
+ }
+
+ rb.Push(ResultSuccess);
+ }
+
+ void CheckFreeCommunicationPermission(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ if (!CheckFreeCommunicationPermissionImpl()) {
+ rb.Push(Error::ResultNoFreeCommunication);
+ } else {
+ rb.Push(ResultSuccess);
+ }
+
+ states.free_communication = true;
+ }
+
+ void ConfirmStereoVisionPermission(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+ states.stereo_vision = true;
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ void IsFreeCommunicationAvailable(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_PCTL, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ if (!CheckFreeCommunicationPermissionImpl()) {
+ rb.Push(Error::ResultNoFreeCommunication);
+ } else {
+ rb.Push(ResultSuccess);
+ }
+ }
+
+ void IsRestrictionEnabled(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ if (False(capability & (Capability::Status | Capability::Recovery))) {
+ LOG_ERROR(Service_PCTL, "Application does not have Status or Recovery capabilities!");
+ rb.Push(Error::ResultNoCapability);
+ rb.Push(false);
+ return;
+ }
+
+ rb.Push(pin_code[0] != '\0');
+ }
+
+ void ConfirmStereoVisionRestrictionConfigurable(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+
+ if (False(capability & Capability::StereoVision)) {
+ LOG_ERROR(Service_PCTL, "Application does not have StereoVision capability!");
+ rb.Push(Error::ResultNoCapability);
+ return;
+ }
+
+ if (pin_code[0] == '\0') {
+ rb.Push(Error::ResultNoRestrictionEnabled);
+ return;
+ }
+
+ rb.Push(ResultSuccess);
+ }
+
+ void IsStereoVisionPermitted(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ if (!ConfirmStereoVisionPermissionImpl()) {
+ rb.Push(Error::ResultStereoVisionRestricted);
+ rb.Push(false);
+ } else {
+ rb.Push(ResultSuccess);
+ rb.Push(true);
+ }
+ }
+
+ void SetStereoVisionRestriction(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto can_use = rp.Pop<bool>();
+ LOG_DEBUG(Service_PCTL, "called, can_use={}", can_use);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ if (False(capability & Capability::StereoVision)) {
+ LOG_ERROR(Service_PCTL, "Application does not have StereoVision capability!");
+ rb.Push(Error::ResultNoCapability);
+ return;
+ }
+
+ SetStereoVisionRestrictionImpl(can_use);
+ rb.Push(ResultSuccess);
+ }
+
+ void GetStereoVisionRestriction(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ if (False(capability & Capability::StereoVision)) {
+ LOG_ERROR(Service_PCTL, "Application does not have StereoVision capability!");
+ rb.Push(Error::ResultNoCapability);
+ rb.Push(false);
+ return;
+ }
+
+ rb.Push(ResultSuccess);
+ rb.Push(settings.is_stero_vision_restricted);
+ }
+
+ void ResetConfirmedStereoVisionPermission(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ states.stereo_vision = false;
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+ }
+
+ struct ApplicationInfo {
+ u64 tid{};
+ std::array<u8, 32> age_rating{};
+ u32 parental_control_flag{};
+ Capability capability{};
+ };
+
+ struct States {
+ u64 current_tid{};
+ ApplicationInfo application_info{};
+ u64 tid_from_event{};
+ bool launch_time_valid{};
+ bool is_suspended{};
+ bool temporary_unlocked{};
+ bool free_communication{};
+ bool stereo_vision{};
+ };
+
+ struct ParentalControlSettings {
+ bool is_stero_vision_restricted{};
+ bool is_free_communication_default_on{};
+ bool disabled{};
+ };
+
+ States states{};
+ ParentalControlSettings settings{};
+ std::array<char, 8> pin_code{};
+ Capability capability{};
+};
+
+void Module::Interface::CreateService(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ // TODO(ogniK): Get TID from process
+
+ rb.PushIpcInterface<IParentalControlService>(system, capability);
+}
+
+void Module::Interface::CreateServiceWithoutInitialize(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_PCTL, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(ResultSuccess);
+ rb.PushIpcInterface<IParentalControlService>(system, capability);
+}
+
+Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
+ const char* name_, Capability capability_)
+ : ServiceFramework{system_, name_}, module{std::move(module_)}, capability{capability_} {}
+
+Module::Interface::~Interface() = default;
+
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
+ auto module = std::make_shared<Module>();
+ std::make_shared<PCTL>(system, module, "pctl",
+ Capability::Application | Capability::SnsPost | Capability::Status |
+ Capability::StereoVision)
+ ->InstallAsService(service_manager);
+ // TODO(ogniK): Implement remaining capabilities
+ std::make_shared<PCTL>(system, module, "pctl:a", Capability::None)
+ ->InstallAsService(service_manager);
+ std::make_shared<PCTL>(system, module, "pctl:r", Capability::None)
+ ->InstallAsService(service_manager);
+ std::make_shared<PCTL>(system, module, "pctl:s", Capability::None)
+ ->InstallAsService(service_manager);
+}
+
+} // namespace Service::PCTL
diff --git a/src/core/hle/service/pctl/module.h b/src/core/hle/service/pctl/pctl_module.h
index f25c5c557..f25c5c557 100644
--- a/src/core/hle/service/pctl/module.h
+++ b/src/core/hle/service/pctl/pctl_module.h
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 4e1541630..b3e50433b 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -21,7 +21,7 @@
#include "core/hle/service/aoc/aoc_u.h"
#include "core/hle/service/apm/apm.h"
#include "core/hle/service/audio/audio.h"
-#include "core/hle/service/bcat/module.h"
+#include "core/hle/service/bcat/bcat_module.h"
#include "core/hle/service/bpc/bpc.h"
#include "core/hle/service/btdrv/btdrv.h"
#include "core/hle/service/btm/btm.h"
@@ -54,7 +54,7 @@
#include "core/hle/service/nvflinger/nvflinger.h"
#include "core/hle/service/olsc/olsc.h"
#include "core/hle/service/pcie/pcie.h"
-#include "core/hle/service/pctl/module.h"
+#include "core/hle/service/pctl/pctl_module.h"
#include "core/hle/service/pcv/pcv.h"
#include "core/hle/service/pm/pm.h"
#include "core/hle/service/prepo/prepo.h"
@@ -64,7 +64,7 @@
#include "core/hle/service/set/settings.h"
#include "core/hle/service/sm/sm.h"
#include "core/hle/service/sockets/sockets.h"
-#include "core/hle/service/spl/module.h"
+#include "core/hle/service/spl/spl_module.h"
#include "core/hle/service/ssl/ssl.h"
#include "core/hle/service/time/time.h"
#include "core/hle/service/usb/usb.h"
@@ -104,23 +104,22 @@ ServiceFrameworkBase::~ServiceFrameworkBase() {
void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) {
const auto guard = LockService();
- ASSERT(!port_installed);
+ ASSERT(!service_registered);
- auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap();
- port->SetSessionHandler(shared_from_this());
- port_installed = true;
+ service_manager.RegisterService(service_name, max_sessions, shared_from_this());
+ service_registered = true;
}
Kernel::KClientPort& ServiceFrameworkBase::CreatePort() {
const auto guard = LockService();
- ASSERT(!port_installed);
+ ASSERT(!service_registered);
auto* port = Kernel::KPort::Create(kernel);
port->Initialize(max_sessions, false, service_name);
port->GetServerPort().SetSessionHandler(shared_from_this());
- port_installed = true;
+ service_registered = true;
return port->GetClientPort();
}
@@ -149,10 +148,10 @@ void ServiceFrameworkBase::ReportUnimplementedFunction(Kernel::HLERequestContext
std::string function_name = info == nullptr ? fmt::format("{}", ctx.GetCommand()) : info->name;
fmt::memory_buffer buf;
- fmt::format_to(buf, "function '{}': port='{}' cmd_buf={{[0]=0x{:X}", function_name,
- service_name, cmd_buf[0]);
+ fmt::format_to(std::back_inserter(buf), "function '{}': port='{}' cmd_buf={{[0]=0x{:X}",
+ function_name, service_name, cmd_buf[0]);
for (int i = 1; i <= 8; ++i) {
- fmt::format_to(buf, ", [{}]=0x{:X}", i, cmd_buf[i]);
+ fmt::format_to(std::back_inserter(buf), ", [{}]=0x{:X}", i, cmd_buf[i]);
}
buf.push_back('}');
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index e078ac176..c9d6b879d 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -96,6 +96,9 @@ protected:
/// System context that the service operates under.
Core::System& system;
+ /// Identifier string used to connect to the service.
+ std::string service_name;
+
private:
template <typename T>
friend class ServiceFramework;
@@ -117,14 +120,12 @@ private:
void RegisterHandlersBaseTipc(const FunctionInfoBase* functions, std::size_t n);
void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);
- /// Identifier string used to connect to the service.
- std::string service_name;
/// Maximum number of concurrent sessions that this service can handle.
u32 max_sessions;
/// Flag to store if a port was already create/installed to detect multiple install attempts,
/// which is not supported.
- bool port_installed = false;
+ bool service_registered = false;
/// Function used to safely up-cast pointers to the derived class before invoking a handler.
InvokerFn* handler_invoker;
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index ece2a74c6..522a604a5 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -160,7 +160,7 @@ void SET::GetQuestFlag(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- rb.Push(static_cast<u32>(Settings::values.quest_flag));
+ rb.Push(static_cast<u32>(Settings::values.quest_flag.GetValue()));
}
void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
deleted file mode 100644
index 8b9418e0f..000000000
--- a/src/core/hle/service/sm/controller.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/k_client_port.h"
-#include "core/hle/kernel/k_client_session.h"
-#include "core/hle/kernel/k_port.h"
-#include "core/hle/kernel/k_scoped_resource_reservation.h"
-#include "core/hle/kernel/k_server_port.h"
-#include "core/hle/kernel/k_server_session.h"
-#include "core/hle/kernel/k_session.h"
-#include "core/hle/service/sm/controller.h"
-
-namespace Service::SM {
-
-void Controller::ConvertCurrentObjectToDomain(Kernel::HLERequestContext& ctx) {
- ASSERT_MSG(!ctx.Session()->IsDomain(), "Session is already a domain");
- LOG_DEBUG(Service, "called, server_session={}", ctx.Session()->GetId());
- ctx.Session()->ConvertToDomain();
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.Push<u32>(1); // Converted sessions start with 1 request handler
-}
-
-void Controller::CloneCurrentObject(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service, "called");
-
- auto& parent_session = *ctx.Session()->GetParent();
- auto& parent_port = parent_session.GetParent()->GetParent()->GetClientPort();
- auto& session_manager = parent_session.GetServerSession().GetSessionRequestManager();
-
- // Create a session.
- Kernel::KClientSession* session{};
- const ResultCode result = parent_port.CreateSession(std::addressof(session), session_manager);
- if (result.IsError()) {
- LOG_CRITICAL(Service, "CreateSession failed with error 0x{:08X}", result.raw);
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(result);
- }
-
- // We succeeded.
- IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
- rb.Push(ResultSuccess);
- rb.PushMoveObjects(session);
-}
-
-void Controller::CloneCurrentObjectEx(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service, "called");
-
- CloneCurrentObject(ctx);
-}
-
-void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) {
- LOG_WARNING(Service, "(STUBBED) called");
-
- IPC::ResponseBuilder rb{ctx, 3};
- rb.Push(ResultSuccess);
- rb.Push<u16>(0x8000);
-}
-
-// https://switchbrew.org/wiki/IPC_Marshalling
-Controller::Controller(Core::System& system_) : ServiceFramework{system_, "IpcController"} {
- static const FunctionInfo functions[] = {
- {0, &Controller::ConvertCurrentObjectToDomain, "ConvertCurrentObjectToDomain"},
- {1, nullptr, "CopyFromCurrentDomain"},
- {2, &Controller::CloneCurrentObject, "CloneCurrentObject"},
- {3, &Controller::QueryPointerBufferSize, "QueryPointerBufferSize"},
- {4, &Controller::CloneCurrentObjectEx, "CloneCurrentObjectEx"},
- };
- RegisterHandlers(functions);
-}
-
-Controller::~Controller() = default;
-
-} // namespace Service::SM
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index c7828c3bd..ae4dc4a75 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -4,6 +4,7 @@
#include <tuple>
#include "common/assert.h"
+#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/k_client_port.h"
@@ -14,8 +15,8 @@
#include "core/hle/kernel/k_server_session.h"
#include "core/hle/kernel/k_session.h"
#include "core/hle/result.h"
-#include "core/hle/service/sm/controller.h"
#include "core/hle/service/sm/sm.h"
+#include "core/hle/service/sm/sm_controller.h"
namespace Service::SM {
@@ -40,17 +41,13 @@ static ResultCode ValidateServiceName(const std::string& name) {
}
Kernel::KClientPort& ServiceManager::InterfaceFactory(ServiceManager& self, Core::System& system) {
- ASSERT(self.sm_interface.expired());
-
- auto sm = std::make_shared<SM>(self, system);
- self.sm_interface = sm;
+ self.sm_interface = std::make_shared<SM>(self, system);
self.controller_interface = std::make_unique<Controller>(system);
-
- return sm->CreatePort();
+ return self.sm_interface->CreatePort();
}
-ResultVal<Kernel::KServerPort*> ServiceManager::RegisterService(std::string name,
- u32 max_sessions) {
+ResultCode ServiceManager::RegisterService(std::string name, u32 max_sessions,
+ Kernel::SessionRequestHandlerPtr handler) {
CASCADE_CODE(ValidateServiceName(name));
@@ -59,12 +56,9 @@ ResultVal<Kernel::KServerPort*> ServiceManager::RegisterService(std::string name
return ERR_ALREADY_REGISTERED;
}
- auto* port = Kernel::KPort::Create(kernel);
- port->Initialize(max_sessions, false, name);
+ registered_services.emplace(std::move(name), handler);
- registered_services.emplace(std::move(name), port);
-
- return MakeResult(&port->GetServerPort());
+ return ResultSuccess;
}
ResultCode ServiceManager::UnregisterService(const std::string& name) {
@@ -76,14 +70,11 @@ ResultCode ServiceManager::UnregisterService(const std::string& name) {
return ERR_SERVICE_NOT_REGISTERED;
}
- iter->second->Close();
-
registered_services.erase(iter);
return ResultSuccess;
}
ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name) {
-
CASCADE_CODE(ValidateServiceName(name));
auto it = registered_services.find(name);
if (it == registered_services.end()) {
@@ -91,10 +82,13 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name
return ERR_SERVICE_NOT_REGISTERED;
}
- return MakeResult(it->second);
-}
+ auto* port = Kernel::KPort::Create(kernel);
+ port->Initialize(ServerSessionCountMax, false, name);
+ auto handler = it->second;
+ port->GetServerPort().SetSessionHandler(std::move(handler));
-SM::~SM() = default;
+ return MakeResult(port);
+}
/**
* SM::Initialize service function
@@ -156,11 +150,15 @@ ResultVal<Kernel::KClientSession*> SM::GetServiceImpl(Kernel::HLERequestContext&
LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, port_result.Code().raw);
return port_result.Code();
}
- auto& port = port_result.Unwrap()->GetClientPort();
+ auto& port = port_result.Unwrap();
+ SCOPE_EXIT({ port->GetClientPort().Close(); });
+
+ server_ports.emplace_back(&port->GetServerPort());
// Create a new session.
Kernel::KClientSession* session{};
- if (const auto result = port.CreateSession(std::addressof(session)); result.IsError()) {
+ if (const auto result = port->GetClientPort().CreateSession(std::addressof(session));
+ result.IsError()) {
LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw);
return result;
}
@@ -180,20 +178,21 @@ void SM::RegisterService(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name,
max_session_count, is_light);
- auto handle = service_manager.RegisterService(name, max_session_count);
- if (handle.Failed()) {
- LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}",
- handle.Code().raw);
+ if (const auto result = service_manager.RegisterService(name, max_session_count, nullptr);
+ result.IsError()) {
+ LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", result.raw);
IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(handle.Code());
+ rb.Push(result);
return;
}
- IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
- rb.Push(handle.Code());
+ auto* port = Kernel::KPort::Create(kernel);
+ port->Initialize(ServerSessionCountMax, is_light, name);
+ SCOPE_EXIT({ port->GetClientPort().Close(); });
- auto server_port = handle.Unwrap();
- rb.PushMoveObjects(server_port);
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
+ rb.Push(ResultSuccess);
+ rb.PushMoveObjects(port->GetServerPort());
}
void SM::UnregisterService(Kernel::HLERequestContext& ctx) {
@@ -225,4 +224,10 @@ SM::SM(ServiceManager& service_manager_, Core::System& system_)
});
}
+SM::~SM() {
+ for (auto& server_port : server_ports) {
+ server_port->Close();
+ }
+}
+
} // namespace Service::SM
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index ea37f11d4..068c78588 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -49,6 +49,7 @@ private:
ServiceManager& service_manager;
bool is_initialized{};
Kernel::KernelCore& kernel;
+ std::vector<Kernel::KServerPort*> server_ports;
};
class ServiceManager {
@@ -58,7 +59,8 @@ public:
explicit ServiceManager(Kernel::KernelCore& kernel_);
~ServiceManager();
- ResultVal<Kernel::KServerPort*> RegisterService(std::string name, u32 max_sessions);
+ ResultCode RegisterService(std::string name, u32 max_sessions,
+ Kernel::SessionRequestHandlerPtr handler);
ResultCode UnregisterService(const std::string& name);
ResultVal<Kernel::KPort*> GetServicePort(const std::string& name);
@@ -69,21 +71,17 @@ public:
LOG_DEBUG(Service, "Can't find service: {}", service_name);
return nullptr;
}
- auto* port = service->second;
- if (port == nullptr) {
- return nullptr;
- }
- return std::static_pointer_cast<T>(port->GetServerPort().GetSessionRequestHandler());
+ return std::static_pointer_cast<T>(service->second);
}
void InvokeControlRequest(Kernel::HLERequestContext& context);
private:
- std::weak_ptr<SM> sm_interface;
+ std::shared_ptr<SM> sm_interface;
std::unique_ptr<Controller> controller_interface;
/// Map of registered services, retrieved using GetServicePort.
- std::unordered_map<std::string, Kernel::KPort*> registered_services;
+ std::unordered_map<std::string, Kernel::SessionRequestHandlerPtr> registered_services;
/// Kernel context
Kernel::KernelCore& kernel;
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp
new file mode 100644
index 000000000..b5fbc4569
--- /dev/null
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -0,0 +1,80 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_client_port.h"
+#include "core/hle/kernel/k_client_session.h"
+#include "core/hle/kernel/k_port.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
+#include "core/hle/kernel/k_server_port.h"
+#include "core/hle/kernel/k_server_session.h"
+#include "core/hle/kernel/k_session.h"
+#include "core/hle/service/sm/sm_controller.h"
+
+namespace Service::SM {
+
+void Controller::ConvertCurrentObjectToDomain(Kernel::HLERequestContext& ctx) {
+ ASSERT_MSG(!ctx.Session()->IsDomain(), "Session is already a domain");
+ LOG_DEBUG(Service, "called, server_session={}", ctx.Session()->GetId());
+ ctx.Session()->ConvertToDomain();
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push<u32>(1); // Converted sessions start with 1 request handler
+}
+
+void Controller::CloneCurrentObject(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service, "called");
+
+ auto& parent_session = *ctx.Session()->GetParent();
+ auto& parent_port = parent_session.GetParent()->GetParent()->GetClientPort();
+ auto& session_manager = parent_session.GetServerSession().GetSessionRequestManager();
+
+ // Create a session.
+ Kernel::KClientSession* session{};
+ const ResultCode result = parent_port.CreateSession(std::addressof(session), session_manager);
+ if (result.IsError()) {
+ LOG_CRITICAL(Service, "CreateSession failed with error 0x{:08X}", result.raw);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(result);
+ }
+
+ // We succeeded.
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
+ rb.Push(ResultSuccess);
+ rb.PushMoveObjects(session);
+}
+
+void Controller::CloneCurrentObjectEx(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service, "called");
+
+ CloneCurrentObject(ctx);
+}
+
+void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(ResultSuccess);
+ rb.Push<u16>(0x8000);
+}
+
+// https://switchbrew.org/wiki/IPC_Marshalling
+Controller::Controller(Core::System& system_) : ServiceFramework{system_, "IpcController"} {
+ static const FunctionInfo functions[] = {
+ {0, &Controller::ConvertCurrentObjectToDomain, "ConvertCurrentObjectToDomain"},
+ {1, nullptr, "CopyFromCurrentDomain"},
+ {2, &Controller::CloneCurrentObject, "CloneCurrentObject"},
+ {3, &Controller::QueryPointerBufferSize, "QueryPointerBufferSize"},
+ {4, &Controller::CloneCurrentObjectEx, "CloneCurrentObjectEx"},
+ };
+ RegisterHandlers(functions);
+}
+
+Controller::~Controller() = default;
+
+} // namespace Service::SM
diff --git a/src/core/hle/service/sm/controller.h b/src/core/hle/service/sm/sm_controller.h
index 7494f898d..7494f898d 100644
--- a/src/core/hle/service/sm/controller.h
+++ b/src/core/hle/service/sm/sm_controller.h
diff --git a/src/core/hle/service/spl/csrng.cpp b/src/core/hle/service/spl/csrng.cpp
index 1beca417c..9c7f89475 100644
--- a/src/core/hle/service/spl/csrng.cpp
+++ b/src/core/hle/service/spl/csrng.cpp
@@ -9,7 +9,7 @@ namespace Service::SPL {
CSRNG::CSRNG(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "csrng") {
static const FunctionInfo functions[] = {
- {0, &CSRNG::GetRandomBytes, "GetRandomBytes"},
+ {0, &CSRNG::GenerateRandomBytes, "GenerateRandomBytes"},
};
RegisterHandlers(functions);
}
diff --git a/src/core/hle/service/spl/csrng.h b/src/core/hle/service/spl/csrng.h
index 5c0bd2199..0d03cc6cb 100644
--- a/src/core/hle/service/spl/csrng.h
+++ b/src/core/hle/service/spl/csrng.h
@@ -4,7 +4,7 @@
#pragma once
-#include "core/hle/service/spl/module.h"
+#include "core/hle/service/spl/spl_module.h"
namespace Core {
class System;
diff --git a/src/core/hle/service/spl/module.cpp b/src/core/hle/service/spl/module.cpp
deleted file mode 100644
index 0b5e2b7c3..000000000
--- a/src/core/hle/service/spl/module.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <chrono>
-#include <cstdlib>
-#include <ctime>
-#include <functional>
-#include <vector>
-#include "common/logging/log.h"
-#include "common/settings.h"
-#include "core/hle/ipc_helpers.h"
-#include "core/hle/service/spl/csrng.h"
-#include "core/hle/service/spl/module.h"
-#include "core/hle/service/spl/spl.h"
-
-namespace Service::SPL {
-
-Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
- const char* name)
- : ServiceFramework{system_, name}, module{std::move(module_)},
- rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr))) {}
-
-Module::Interface::~Interface() = default;
-
-void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
- LOG_DEBUG(Service_SPL, "called");
-
- const std::size_t size = ctx.GetWriteBufferSize();
-
- std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max());
- std::vector<u8> data(size);
- std::generate(data.begin(), data.end(), [&] { return static_cast<u8>(distribution(rng)); });
-
- ctx.WriteBuffer(data);
-
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
-}
-
-void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
- auto module = std::make_shared<Module>();
- std::make_shared<CSRNG>(system, module)->InstallAsService(service_manager);
- std::make_shared<SPL>(system, module)->InstallAsService(service_manager);
- std::make_shared<SPL_MIG>(system, module)->InstallAsService(service_manager);
- std::make_shared<SPL_FS>(system, module)->InstallAsService(service_manager);
- std::make_shared<SPL_SSL>(system, module)->InstallAsService(service_manager);
- std::make_shared<SPL_ES>(system, module)->InstallAsService(service_manager);
- std::make_shared<SPL_MANU>(system, module)->InstallAsService(service_manager);
-}
-
-} // namespace Service::SPL
diff --git a/src/core/hle/service/spl/module.h b/src/core/hle/service/spl/module.h
deleted file mode 100644
index 71855c1bf..000000000
--- a/src/core/hle/service/spl/module.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <random>
-#include "core/hle/service/service.h"
-
-namespace Core {
-class System;
-}
-
-namespace Service::SPL {
-
-class Module final {
-public:
- class Interface : public ServiceFramework<Interface> {
- public:
- explicit Interface(Core::System& system_, std::shared_ptr<Module> module_,
- const char* name);
- ~Interface() override;
-
- void GetRandomBytes(Kernel::HLERequestContext& ctx);
-
- protected:
- std::shared_ptr<Module> module;
-
- private:
- std::mt19937 rng;
- };
-};
-
-/// Registers all SPL services with the specified service manager.
-void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
-
-} // namespace Service::SPL
diff --git a/src/core/hle/service/spl/spl.cpp b/src/core/hle/service/spl/spl.cpp
index fff3f3c42..20384042f 100644
--- a/src/core/hle/service/spl/spl.cpp
+++ b/src/core/hle/service/spl/spl.cpp
@@ -10,13 +10,13 @@ SPL::SPL(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "spl:") {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetConfig"},
- {1, nullptr, "ModularExponentiate"},
- {5, nullptr, "SetConfig"},
- {7, &SPL::GetRandomBytes, "GetRandomBytes"},
- {11, nullptr, "IsDevelopment"},
- {24, nullptr, "SetBootReason"},
- {25, nullptr, "GetBootReason"},
+ {0, &SPL::GetConfig, "GetConfig"},
+ {1, &SPL::ModularExponentiate, "ModularExponentiate"},
+ {5, &SPL::SetConfig, "SetConfig"},
+ {7, &SPL::GenerateRandomBytes, "GenerateRandomBytes"},
+ {11, &SPL::IsDevelopment, "IsDevelopment"},
+ {24, &SPL::SetBootReason, "SetBootReason"},
+ {25, &SPL::GetBootReason, "GetBootReason"},
};
// clang-format on
@@ -27,22 +27,22 @@ SPL_MIG::SPL_MIG(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "spl:mig") {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetConfig"},
- {1, nullptr, "ModularExponentiate"},
+ {0, &SPL::GetConfig, "GetConfig"},
+ {1, &SPL::ModularExponentiate, "ModularExponentiate"},
{2, nullptr, "GenerateAesKek"},
{3, nullptr, "LoadAesKey"},
{4, nullptr, "GenerateAesKey"},
- {5, nullptr, "SetConfig"},
- {7, &SPL::GetRandomBytes, "GenerateRandomBytes"},
- {11, nullptr, "IsDevelopment"},
+ {5, &SPL::SetConfig, "SetConfig"},
+ {7, &SPL::GenerateRandomBytes, "GenerateRandomBytes"},
+ {11, &SPL::IsDevelopment, "IsDevelopment"},
{14, nullptr, "DecryptAesKey"},
{15, nullptr, "CryptAesCtr"},
{16, nullptr, "ComputeCmac"},
{21, nullptr, "AllocateAesKeyslot"},
{22, nullptr, "DeallocateAesKeySlot"},
{23, nullptr, "GetAesKeyslotAvailableEvent"},
- {24, nullptr, "SetBootReason"},
- {25, nullptr, "GetBootReason"},
+ {24, &SPL::SetBootReason, "SetBootReason"},
+ {25, &SPL::GetBootReason, "GetBootReason"},
};
// clang-format on
@@ -53,16 +53,16 @@ SPL_FS::SPL_FS(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "spl:fs") {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetConfig"},
- {1, nullptr, "ModularExponentiate"},
+ {0, &SPL::GetConfig, "GetConfig"},
+ {1, &SPL::ModularExponentiate, "ModularExponentiate"},
{2, nullptr, "GenerateAesKek"},
{3, nullptr, "LoadAesKey"},
{4, nullptr, "GenerateAesKey"},
- {5, nullptr, "SetConfig"},
- {7, &SPL::GetRandomBytes, "GenerateRandomBytes"},
+ {5, &SPL::SetConfig, "SetConfig"},
+ {7, &SPL::GenerateRandomBytes, "GenerateRandomBytes"},
{9, nullptr, "ImportLotusKey"},
{10, nullptr, "DecryptLotusMessage"},
- {11, nullptr, "IsDevelopment"},
+ {11, &SPL::IsDevelopment, "IsDevelopment"},
{12, nullptr, "GenerateSpecificAesKey"},
{14, nullptr, "DecryptAesKey"},
{15, nullptr, "CryptAesCtr"},
@@ -71,8 +71,8 @@ SPL_FS::SPL_FS(Core::System& system_, std::shared_ptr<Module> module_)
{21, nullptr, "AllocateAesKeyslot"},
{22, nullptr, "DeallocateAesKeySlot"},
{23, nullptr, "GetAesKeyslotAvailableEvent"},
- {24, nullptr, "SetBootReason"},
- {25, nullptr, "GetBootReason"},
+ {24, &SPL::SetBootReason, "SetBootReason"},
+ {25, &SPL::GetBootReason, "GetBootReason"},
{31, nullptr, "GetPackage2Hash"},
};
// clang-format on
@@ -84,14 +84,14 @@ SPL_SSL::SPL_SSL(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "spl:ssl") {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetConfig"},
- {1, nullptr, "ModularExponentiate"},
+ {0, &SPL::GetConfig, "GetConfig"},
+ {1, &SPL::ModularExponentiate, "ModularExponentiate"},
{2, nullptr, "GenerateAesKek"},
{3, nullptr, "LoadAesKey"},
{4, nullptr, "GenerateAesKey"},
- {5, nullptr, "SetConfig"},
- {7, &SPL::GetRandomBytes, "GetRandomBytes"},
- {11, nullptr, "IsDevelopment"},
+ {5, &SPL::SetConfig, "SetConfig"},
+ {7, &SPL::GenerateRandomBytes, "GenerateRandomBytes"},
+ {11, &SPL::IsDevelopment, "IsDevelopment"},
{13, nullptr, "DecryptDeviceUniqueData"},
{14, nullptr, "DecryptAesKey"},
{15, nullptr, "CryptAesCtr"},
@@ -99,8 +99,8 @@ SPL_SSL::SPL_SSL(Core::System& system_, std::shared_ptr<Module> module_)
{21, nullptr, "AllocateAesKeyslot"},
{22, nullptr, "DeallocateAesKeySlot"},
{23, nullptr, "GetAesKeyslotAvailableEvent"},
- {24, nullptr, "SetBootReason"},
- {25, nullptr, "GetBootReason"},
+ {24, &SPL::SetBootReason, "SetBootReason"},
+ {25, &SPL::GetBootReason, "GetBootReason"},
{26, nullptr, "DecryptAndStoreSslClientCertKey"},
{27, nullptr, "ModularExponentiateWithSslClientCertKey"},
};
@@ -113,14 +113,14 @@ SPL_ES::SPL_ES(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "spl:es") {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetConfig"},
- {1, nullptr, "ModularExponentiate"},
+ {0, &SPL::GetConfig, "GetConfig"},
+ {1, &SPL::ModularExponentiate, "ModularExponentiate"},
{2, nullptr, "GenerateAesKek"},
{3, nullptr, "LoadAesKey"},
{4, nullptr, "GenerateAesKey"},
- {5, nullptr, "SetConfig"},
- {7, &SPL::GetRandomBytes, "GenerateRandomBytes"},
- {11, nullptr, "IsDevelopment"},
+ {5, &SPL::SetConfig, "SetConfig"},
+ {7, &SPL::GenerateRandomBytes, "GenerateRandomBytes"},
+ {11, &SPL::IsDevelopment, "IsDevelopment"},
{13, nullptr, "DecryptDeviceUniqueData"},
{14, nullptr, "DecryptAesKey"},
{15, nullptr, "CryptAesCtr"},
@@ -131,8 +131,8 @@ SPL_ES::SPL_ES(Core::System& system_, std::shared_ptr<Module> module_)
{21, nullptr, "AllocateAesKeyslot"},
{22, nullptr, "DeallocateAesKeySlot"},
{23, nullptr, "GetAesKeyslotAvailableEvent"},
- {24, nullptr, "SetBootReason"},
- {25, nullptr, "GetBootReason"},
+ {24, &SPL::SetBootReason, "SetBootReason"},
+ {25, &SPL::GetBootReason, "GetBootReason"},
{28, nullptr, "DecryptAndStoreDrmDeviceCertKey"},
{29, nullptr, "ModularExponentiateWithDrmDeviceCertKey"},
{31, nullptr, "PrepareEsArchiveKey"},
@@ -147,14 +147,14 @@ SPL_MANU::SPL_MANU(Core::System& system_, std::shared_ptr<Module> module_)
: Interface(system_, std::move(module_), "spl:manu") {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetConfig"},
- {1, nullptr, "ModularExponentiate"},
+ {0, &SPL::GetConfig, "GetConfig"},
+ {1, &SPL::ModularExponentiate, "ModularExponentiate"},
{2, nullptr, "GenerateAesKek"},
{3, nullptr, "LoadAesKey"},
{4, nullptr, "GenerateAesKey"},
- {5, nullptr, "SetConfig"},
- {7, &SPL::GetRandomBytes, "GetRandomBytes"},
- {11, nullptr, "IsDevelopment"},
+ {5, &SPL::SetConfig, "SetConfig"},
+ {7, &SPL::GenerateRandomBytes, "GenerateRandomBytes"},
+ {11, &SPL::IsDevelopment, "IsDevelopment"},
{13, nullptr, "DecryptDeviceUniqueData"},
{14, nullptr, "DecryptAesKey"},
{15, nullptr, "CryptAesCtr"},
@@ -162,8 +162,8 @@ SPL_MANU::SPL_MANU(Core::System& system_, std::shared_ptr<Module> module_)
{21, nullptr, "AllocateAesKeyslot"},
{22, nullptr, "DeallocateAesKeySlot"},
{23, nullptr, "GetAesKeyslotAvailableEvent"},
- {24, nullptr, "SetBootReason"},
- {25, nullptr, "GetBootReason"},
+ {24, &SPL::SetBootReason, "SetBootReason"},
+ {25, &SPL::GetBootReason, "GetBootReason"},
{30, nullptr, "ReencryptDeviceUniqueData"},
};
// clang-format on
diff --git a/src/core/hle/service/spl/spl.h b/src/core/hle/service/spl/spl.h
index 9b35012ed..5599c0c01 100644
--- a/src/core/hle/service/spl/spl.h
+++ b/src/core/hle/service/spl/spl.h
@@ -4,7 +4,7 @@
#pragma once
-#include "core/hle/service/spl/module.h"
+#include "core/hle/service/spl/spl_module.h"
namespace Core {
class System;
diff --git a/src/core/hle/service/spl/spl_module.cpp b/src/core/hle/service/spl/spl_module.cpp
new file mode 100644
index 000000000..918633af5
--- /dev/null
+++ b/src/core/hle/service/spl/spl_module.cpp
@@ -0,0 +1,175 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <chrono>
+#include <cstdlib>
+#include <ctime>
+#include <functional>
+#include <vector>
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/hle/api_version.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/spl/csrng.h"
+#include "core/hle/service/spl/spl.h"
+#include "core/hle/service/spl/spl_module.h"
+
+namespace Service::SPL {
+
+Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
+ const char* name)
+ : ServiceFramework{system_, name}, module{std::move(module_)},
+ rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr))) {}
+
+Module::Interface::~Interface() = default;
+
+void Module::Interface::GetConfig(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto config_item = rp.PopEnum<ConfigItem>();
+
+ // This should call svcCallSecureMonitor with the appropriate args.
+ // Since we do not have it implemented yet, we will use this for now.
+ const auto smc_result = GetConfigImpl(config_item);
+ const auto result_code = smc_result.Code();
+
+ if (smc_result.Failed()) {
+ LOG_ERROR(Service_SPL, "called, config_item={}, result_code={}", config_item,
+ result_code.raw);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(result_code);
+ }
+
+ LOG_DEBUG(Service_SPL, "called, config_item={}, result_code={}, smc_result={}", config_item,
+ result_code.raw, *smc_result);
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(result_code);
+ rb.Push(*smc_result);
+}
+
+void Module::Interface::ModularExponentiate(Kernel::HLERequestContext& ctx) {
+ UNIMPLEMENTED_MSG("ModularExponentiate is not implemented!");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSecureMonitorNotImplemented);
+}
+
+void Module::Interface::SetConfig(Kernel::HLERequestContext& ctx) {
+ UNIMPLEMENTED_MSG("SetConfig is not implemented!");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSecureMonitorNotImplemented);
+}
+
+void Module::Interface::GenerateRandomBytes(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_SPL, "called");
+
+ const std::size_t size = ctx.GetWriteBufferSize();
+
+ std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max());
+ std::vector<u8> data(size);
+ std::generate(data.begin(), data.end(), [&] { return static_cast<u8>(distribution(rng)); });
+
+ ctx.WriteBuffer(data);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
+void Module::Interface::IsDevelopment(Kernel::HLERequestContext& ctx) {
+ UNIMPLEMENTED_MSG("IsDevelopment is not implemented!");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSecureMonitorNotImplemented);
+}
+
+void Module::Interface::SetBootReason(Kernel::HLERequestContext& ctx) {
+ UNIMPLEMENTED_MSG("SetBootReason is not implemented!");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSecureMonitorNotImplemented);
+}
+
+void Module::Interface::GetBootReason(Kernel::HLERequestContext& ctx) {
+ UNIMPLEMENTED_MSG("GetBootReason is not implemented!");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSecureMonitorNotImplemented);
+}
+
+ResultVal<u64> Module::Interface::GetConfigImpl(ConfigItem config_item) const {
+ switch (config_item) {
+ case ConfigItem::DisableProgramVerification:
+ case ConfigItem::DramId:
+ case ConfigItem::SecurityEngineInterruptNumber:
+ case ConfigItem::FuseVersion:
+ case ConfigItem::HardwareType:
+ case ConfigItem::HardwareState:
+ case ConfigItem::IsRecoveryBoot:
+ case ConfigItem::DeviceId:
+ case ConfigItem::BootReason:
+ case ConfigItem::MemoryMode:
+ case ConfigItem::IsDevelopmentFunctionEnabled:
+ case ConfigItem::KernelConfiguration:
+ case ConfigItem::IsChargerHiZModeEnabled:
+ case ConfigItem::QuestState:
+ case ConfigItem::RegulatorType:
+ case ConfigItem::DeviceUniqueKeyGeneration:
+ case ConfigItem::Package2Hash:
+ return ResultSecureMonitorNotImplemented;
+ case ConfigItem::ExosphereApiVersion:
+ // Get information about the current exosphere version.
+ return MakeResult((u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MAJOR} << 56) |
+ (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MINOR} << 48) |
+ (u64{HLE::ApiVersion::ATMOSPHERE_RELEASE_VERSION_MICRO} << 40) |
+ (static_cast<u64>(HLE::ApiVersion::GetTargetFirmware())));
+ case ConfigItem::ExosphereNeedsReboot:
+ // We are executing, so we aren't in the process of rebooting.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereNeedsShutdown:
+ // We are executing, so we aren't in the process of shutting down.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereGitCommitHash:
+ // Get information about the current exosphere git commit hash.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereHasRcmBugPatch:
+ // Get information about whether this unit has the RCM bug patched.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereBlankProdInfo:
+ // Get whether this unit should simulate a "blanked" PRODINFO.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereAllowCalWrites:
+ // Get whether this unit should allow writing to the calibration partition.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereEmummcType:
+ // Get what kind of emummc this unit has active.
+ return MakeResult(u64{0});
+ case ConfigItem::ExospherePayloadAddress:
+ // Gets the physical address of the reboot payload buffer, if one exists.
+ return ResultSecureMonitorNotInitialized;
+ case ConfigItem::ExosphereLogConfiguration:
+ // Get the log configuration.
+ return MakeResult(u64{0});
+ case ConfigItem::ExosphereForceEnableUsb30:
+ // Get whether usb 3.0 should be force-enabled.
+ return MakeResult(u64{0});
+ default:
+ return ResultSecureMonitorInvalidArgument;
+ }
+}
+
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
+ auto module = std::make_shared<Module>();
+ std::make_shared<CSRNG>(system, module)->InstallAsService(service_manager);
+ std::make_shared<SPL>(system, module)->InstallAsService(service_manager);
+ std::make_shared<SPL_MIG>(system, module)->InstallAsService(service_manager);
+ std::make_shared<SPL_FS>(system, module)->InstallAsService(service_manager);
+ std::make_shared<SPL_SSL>(system, module)->InstallAsService(service_manager);
+ std::make_shared<SPL_ES>(system, module)->InstallAsService(service_manager);
+ std::make_shared<SPL_MANU>(system, module)->InstallAsService(service_manager);
+}
+
+} // namespace Service::SPL
diff --git a/src/core/hle/service/spl/spl_module.h b/src/core/hle/service/spl/spl_module.h
new file mode 100644
index 000000000..61630df80
--- /dev/null
+++ b/src/core/hle/service/spl/spl_module.h
@@ -0,0 +1,48 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <random>
+#include "core/hle/service/service.h"
+#include "core/hle/service/spl/spl_results.h"
+#include "core/hle/service/spl/spl_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace Service::SPL {
+
+class Module final {
+public:
+ class Interface : public ServiceFramework<Interface> {
+ public:
+ explicit Interface(Core::System& system_, std::shared_ptr<Module> module_,
+ const char* name);
+ ~Interface() override;
+
+ // General
+ void GetConfig(Kernel::HLERequestContext& ctx);
+ void ModularExponentiate(Kernel::HLERequestContext& ctx);
+ void SetConfig(Kernel::HLERequestContext& ctx);
+ void GenerateRandomBytes(Kernel::HLERequestContext& ctx);
+ void IsDevelopment(Kernel::HLERequestContext& ctx);
+ void SetBootReason(Kernel::HLERequestContext& ctx);
+ void GetBootReason(Kernel::HLERequestContext& ctx);
+
+ protected:
+ std::shared_ptr<Module> module;
+
+ private:
+ ResultVal<u64> GetConfigImpl(ConfigItem config_item) const;
+
+ std::mt19937 rng;
+ };
+};
+
+/// Registers all SPL services with the specified service manager.
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
+
+} // namespace Service::SPL
diff --git a/src/core/hle/service/spl/spl_results.h b/src/core/hle/service/spl/spl_results.h
new file mode 100644
index 000000000..a07c61409
--- /dev/null
+++ b/src/core/hle/service/spl/spl_results.h
@@ -0,0 +1,31 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::SPL {
+
+// Description 0 - 99
+constexpr ResultCode ResultSecureMonitorError{ErrorModule::SPL, 0};
+constexpr ResultCode ResultSecureMonitorNotImplemented{ErrorModule::SPL, 1};
+constexpr ResultCode ResultSecureMonitorInvalidArgument{ErrorModule::SPL, 2};
+constexpr ResultCode ResultSecureMonitorBusy{ErrorModule::SPL, 3};
+constexpr ResultCode ResultSecureMonitorNoAsyncOperation{ErrorModule::SPL, 4};
+constexpr ResultCode ResultSecureMonitorInvalidAsyncOperation{ErrorModule::SPL, 5};
+constexpr ResultCode ResultSecureMonitorNotPermitted{ErrorModule::SPL, 6};
+constexpr ResultCode ResultSecureMonitorNotInitialized{ErrorModule::SPL, 7};
+
+constexpr ResultCode ResultInvalidSize{ErrorModule::SPL, 100};
+constexpr ResultCode ResultUnknownSecureMonitorError{ErrorModule::SPL, 101};
+constexpr ResultCode ResultDecryptionFailed{ErrorModule::SPL, 102};
+
+constexpr ResultCode ResultOutOfKeySlots{ErrorModule::SPL, 104};
+constexpr ResultCode ResultInvalidKeySlot{ErrorModule::SPL, 105};
+constexpr ResultCode ResultBootReasonAlreadySet{ErrorModule::SPL, 106};
+constexpr ResultCode ResultBootReasonNotSet{ErrorModule::SPL, 107};
+constexpr ResultCode ResultInvalidArgument{ErrorModule::SPL, 108};
+
+} // namespace Service::SPL
diff --git a/src/core/hle/service/spl/spl_types.h b/src/core/hle/service/spl/spl_types.h
new file mode 100644
index 000000000..a654e7556
--- /dev/null
+++ b/src/core/hle/service/spl/spl_types.h
@@ -0,0 +1,232 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Service::SPL {
+
+constexpr size_t AES_128_KEY_SIZE = 0x10;
+
+namespace Smc {
+
+enum class FunctionId : u32 {
+ SetConfig = 0xC3000401,
+ GetConfig = 0xC3000002,
+ GetResult = 0xC3000003,
+ GetResultData = 0xC3000404,
+ ModularExponentiate = 0xC3000E05,
+ GenerateRandomBytes = 0xC3000006,
+ GenerateAesKek = 0xC3000007,
+ LoadAesKey = 0xC3000008,
+ ComputeAes = 0xC3000009,
+ GenerateSpecificAesKey = 0xC300000A,
+ ComputeCmac = 0xC300040B,
+ ReencryptDeviceUniqueData = 0xC300D60C,
+ DecryptDeviceUniqueData = 0xC300100D,
+
+ ModularExponentiateWithStorageKey = 0xC300060F,
+ PrepareEsDeviceUniqueKey = 0xC3000610,
+ LoadPreparedAesKey = 0xC3000011,
+ PrepareCommonEsTitleKey = 0xC3000012,
+
+ // Deprecated functions.
+ LoadEsDeviceKey = 0xC300100C,
+ DecryptAndStoreGcKey = 0xC300100E,
+
+ // Atmosphere functions.
+ AtmosphereIramCopy = 0xF0000201,
+ AtmosphereReadWriteRegister = 0xF0000002,
+
+ AtmosphereGetEmummcConfig = 0xF0000404,
+};
+
+enum class CipherMode {
+ CbcEncrypt = 0,
+ CbcDecrypt = 1,
+ Ctr = 2,
+};
+
+enum class DeviceUniqueDataMode {
+ DecryptDeviceUniqueData = 0,
+ DecryptAndStoreGcKey = 1,
+ DecryptAndStoreEsDeviceKey = 2,
+ DecryptAndStoreSslKey = 3,
+ DecryptAndStoreDrmDeviceCertKey = 4,
+};
+
+enum class ModularExponentiateWithStorageKeyMode {
+ Gc = 0,
+ Ssl = 1,
+ DrmDeviceCert = 2,
+};
+
+enum class EsCommonKeyType {
+ TitleKey = 0,
+ ArchiveKey = 1,
+};
+
+struct AsyncOperationKey {
+ u64 value;
+};
+
+} // namespace Smc
+
+enum class HardwareType {
+ Icosa = 0,
+ Copper = 1,
+ Hoag = 2,
+ Iowa = 3,
+ Calcio = 4,
+ Aula = 5,
+};
+
+enum class SocType {
+ Erista = 0,
+ Mariko = 1,
+};
+
+enum class HardwareState {
+ Development = 0,
+ Production = 1,
+};
+
+enum class MemoryArrangement {
+ Standard = 0,
+ StandardForAppletDev = 1,
+ StandardForSystemDev = 2,
+ Expanded = 3,
+ ExpandedForAppletDev = 4,
+
+ // Note: Dynamic is not official.
+ // Atmosphere uses it to maintain compatibility with firmwares prior to 6.0.0,
+ // which removed the explicit retrieval of memory arrangement from PM.
+ Dynamic = 5,
+ Count,
+};
+
+enum class BootReason {
+ Unknown = 0,
+ AcOk = 1,
+ OnKey = 2,
+ RtcAlarm1 = 3,
+ RtcAlarm2 = 4,
+};
+
+struct BootReasonValue {
+ union {
+ u32 value{};
+
+ BitField<0, 8, u32> power_intr;
+ BitField<8, 8, u32> rtc_intr;
+ BitField<16, 8, u32> nv_erc;
+ BitField<24, 8, u32> boot_reason;
+ };
+};
+static_assert(sizeof(BootReasonValue) == sizeof(u32), "BootReasonValue definition!");
+
+struct AesKey {
+ std::array<u64, AES_128_KEY_SIZE / sizeof(u64)> data64{};
+
+ std::span<u8> AsBytes() {
+ return std::span{reinterpret_cast<u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+
+ std::span<const u8> AsBytes() const {
+ return std::span{reinterpret_cast<const u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+};
+static_assert(sizeof(AesKey) == AES_128_KEY_SIZE, "AesKey definition!");
+
+struct IvCtr {
+ std::array<u64, AES_128_KEY_SIZE / sizeof(u64)> data64{};
+
+ std::span<u8> AsBytes() {
+ return std::span{reinterpret_cast<u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+
+ std::span<const u8> AsBytes() const {
+ return std::span{reinterpret_cast<const u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+};
+static_assert(sizeof(AesKey) == AES_128_KEY_SIZE, "IvCtr definition!");
+
+struct Cmac {
+ std::array<u64, AES_128_KEY_SIZE / sizeof(u64)> data64{};
+
+ std::span<u8> AsBytes() {
+ return std::span{reinterpret_cast<u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+
+ std::span<const u8> AsBytes() const {
+ return std::span{reinterpret_cast<const u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+};
+static_assert(sizeof(AesKey) == AES_128_KEY_SIZE, "Cmac definition!");
+
+struct AccessKey {
+ std::array<u64, AES_128_KEY_SIZE / sizeof(u64)> data64{};
+
+ std::span<u8> AsBytes() {
+ return std::span{reinterpret_cast<u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+
+ std::span<const u8> AsBytes() const {
+ return std::span{reinterpret_cast<const u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+};
+static_assert(sizeof(AesKey) == AES_128_KEY_SIZE, "AccessKey definition!");
+
+struct KeySource {
+ std::array<u64, AES_128_KEY_SIZE / sizeof(u64)> data64{};
+
+ std::span<u8> AsBytes() {
+ return std::span{reinterpret_cast<u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+
+ std::span<const u8> AsBytes() const {
+ return std::span{reinterpret_cast<const u8*>(data64.data()), AES_128_KEY_SIZE};
+ }
+};
+static_assert(sizeof(AesKey) == AES_128_KEY_SIZE, "KeySource definition!");
+
+enum class ConfigItem : u32 {
+ // Standard config items.
+ DisableProgramVerification = 1,
+ DramId = 2,
+ SecurityEngineInterruptNumber = 3,
+ FuseVersion = 4,
+ HardwareType = 5,
+ HardwareState = 6,
+ IsRecoveryBoot = 7,
+ DeviceId = 8,
+ BootReason = 9,
+ MemoryMode = 10,
+ IsDevelopmentFunctionEnabled = 11,
+ KernelConfiguration = 12,
+ IsChargerHiZModeEnabled = 13,
+ QuestState = 14,
+ RegulatorType = 15,
+ DeviceUniqueKeyGeneration = 16,
+ Package2Hash = 17,
+
+ // Extension config items for exosphere.
+ ExosphereApiVersion = 65000,
+ ExosphereNeedsReboot = 65001,
+ ExosphereNeedsShutdown = 65002,
+ ExosphereGitCommitHash = 65003,
+ ExosphereHasRcmBugPatch = 65004,
+ ExosphereBlankProdInfo = 65005,
+ ExosphereAllowCalWrites = 65006,
+ ExosphereEmummcType = 65007,
+ ExospherePayloadAddress = 65008,
+ ExosphereLogConfiguration = 65009,
+ ExosphereForceEnableUsb30 = 65010,
+};
+
+} // namespace Service::SPL
diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp
deleted file mode 100644
index 53a204796..000000000
--- a/src/core/hle/service/time/interface.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2019 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/hle/service/time/interface.h"
-
-namespace Service::Time {
-
-Time::Time(std::shared_ptr<Module> module_, Core::System& system_, const char* name_)
- : Interface{std::move(module_), system_, name_} {
- // clang-format off
- static const FunctionInfo functions[] = {
- {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"},
- {1, &Time::GetStandardNetworkSystemClock, "GetStandardNetworkSystemClock"},
- {2, &Time::GetStandardSteadyClock, "GetStandardSteadyClock"},
- {3, &Time::GetTimeZoneService, "GetTimeZoneService"},
- {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
- {5, nullptr, "GetEphemeralNetworkSystemClock"},
- {20, &Time::GetSharedMemoryNativeHandle, "GetSharedMemoryNativeHandle"},
- {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"},
- {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"},
- {50, nullptr, "SetStandardSteadyClockInternalOffset"},
- {51, nullptr, "GetStandardSteadyClockRtcValue"},
- {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
- {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
- {102, nullptr, "GetStandardUserSystemClockInitialYear"},
- {200, &Time::IsStandardNetworkSystemClockAccuracySufficient, "IsStandardNetworkSystemClockAccuracySufficient"},
- {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"},
- {300, &Time::CalculateMonotonicSystemClockBaseTimePoint, "CalculateMonotonicSystemClockBaseTimePoint"},
- {400, &Time::GetClockSnapshot, "GetClockSnapshot"},
- {401, &Time::GetClockSnapshotFromSystemClockContext, "GetClockSnapshotFromSystemClockContext"},
- {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, "CalculateStandardUserSystemClockDifferenceByUser"},
- {501, &Time::CalculateSpanBetween, "CalculateSpanBetween"},
- };
- // clang-format on
-
- RegisterHandlers(functions);
-}
-
-Time::~Time() = default;
-
-} // namespace Service::Time
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index d6f710eba..8fdd5076f 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -11,8 +11,8 @@
#include "core/hle/kernel/k_client_port.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/service/time/interface.h"
#include "core/hle/service/time/time.h"
+#include "core/hle/service/time/time_interface.h"
#include "core/hle/service/time/time_sharedmemory.h"
#include "core/hle/service/time/time_zone_service.h"
diff --git a/src/core/hle/service/time/time_interface.cpp b/src/core/hle/service/time/time_interface.cpp
new file mode 100644
index 000000000..bb7b6b5c1
--- /dev/null
+++ b/src/core/hle/service/time/time_interface.cpp
@@ -0,0 +1,42 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/time/time_interface.h"
+
+namespace Service::Time {
+
+Time::Time(std::shared_ptr<Module> module_, Core::System& system_, const char* name_)
+ : Interface{std::move(module_), system_, name_} {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"},
+ {1, &Time::GetStandardNetworkSystemClock, "GetStandardNetworkSystemClock"},
+ {2, &Time::GetStandardSteadyClock, "GetStandardSteadyClock"},
+ {3, &Time::GetTimeZoneService, "GetTimeZoneService"},
+ {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
+ {5, nullptr, "GetEphemeralNetworkSystemClock"},
+ {20, &Time::GetSharedMemoryNativeHandle, "GetSharedMemoryNativeHandle"},
+ {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"},
+ {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"},
+ {50, nullptr, "SetStandardSteadyClockInternalOffset"},
+ {51, nullptr, "GetStandardSteadyClockRtcValue"},
+ {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
+ {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
+ {102, nullptr, "GetStandardUserSystemClockInitialYear"},
+ {200, &Time::IsStandardNetworkSystemClockAccuracySufficient, "IsStandardNetworkSystemClockAccuracySufficient"},
+ {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"},
+ {300, &Time::CalculateMonotonicSystemClockBaseTimePoint, "CalculateMonotonicSystemClockBaseTimePoint"},
+ {400, &Time::GetClockSnapshot, "GetClockSnapshot"},
+ {401, &Time::GetClockSnapshotFromSystemClockContext, "GetClockSnapshotFromSystemClockContext"},
+ {500, &Time::CalculateStandardUserSystemClockDifferenceByUser, "CalculateStandardUserSystemClockDifferenceByUser"},
+ {501, &Time::CalculateSpanBetween, "CalculateSpanBetween"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+}
+
+Time::~Time() = default;
+
+} // namespace Service::Time
diff --git a/src/core/hle/service/time/interface.h b/src/core/hle/service/time/time_interface.h
index c41766f1a..c41766f1a 100644
--- a/src/core/hle/service/time/interface.h
+++ b/src/core/hle/service/time/time_interface.h
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp
index bf4402308..c634b6abd 100644
--- a/src/core/hle/service/time/time_zone_content_manager.cpp
+++ b/src/core/hle/service/time/time_zone_content_manager.cpp
@@ -125,7 +125,7 @@ ResultCode TimeZoneContentManager::GetTimeZoneInfoFile(const std::string& locati
return ERROR_TIME_NOT_FOUND;
}
- vfs_file = zoneinfo_dir->GetFile(location_name);
+ vfs_file = zoneinfo_dir->GetFileRelative(location_name);
if (!vfs_file) {
LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"! Using default timezone.",
time_zone_binary_titleid, location_name);
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 228dc6389..199e69e89 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -206,7 +206,8 @@ AppLoader::~AppLoader() = default;
* @return std::unique_ptr<AppLoader> a pointer to a loader object; nullptr for unsupported type
*/
static std::unique_ptr<AppLoader> GetFileLoader(Core::System& system, FileSys::VirtualFile file,
- FileType type, std::size_t program_index) {
+ FileType type, u64 program_id,
+ std::size_t program_index) {
switch (type) {
// Standard ELF file format.
case FileType::ELF:
@@ -227,7 +228,8 @@ static std::unique_ptr<AppLoader> GetFileLoader(Core::System& system, FileSys::V
// NX XCI (nX Card Image) file format.
case FileType::XCI:
return std::make_unique<AppLoader_XCI>(std::move(file), system.GetFileSystemController(),
- system.GetContentProvider(), program_index);
+ system.GetContentProvider(), program_id,
+ program_index);
// NX NAX (NintendoAesXts) file format.
case FileType::NAX:
@@ -236,7 +238,8 @@ static std::unique_ptr<AppLoader> GetFileLoader(Core::System& system, FileSys::V
// NX NSP (Nintendo Submission Package) file format
case FileType::NSP:
return std::make_unique<AppLoader_NSP>(std::move(file), system.GetFileSystemController(),
- system.GetContentProvider(), program_index);
+ system.GetContentProvider(), program_id,
+ program_index);
// NX KIP (Kernel Internal Process) file format
case FileType::KIP:
@@ -252,7 +255,7 @@ static std::unique_ptr<AppLoader> GetFileLoader(Core::System& system, FileSys::V
}
std::unique_ptr<AppLoader> GetLoader(Core::System& system, FileSys::VirtualFile file,
- std::size_t program_index) {
+ u64 program_id, std::size_t program_index) {
FileType type = IdentifyFile(file);
const FileType filename_type = GuessFromFilename(file->GetName());
@@ -266,7 +269,7 @@ std::unique_ptr<AppLoader> GetLoader(Core::System& system, FileSys::VirtualFile
LOG_DEBUG(Loader, "Loading file {} as {}...", file->GetName(), GetFileTypeString(type));
- return GetFileLoader(system, std::move(file), type, program_index);
+ return GetFileLoader(system, std::move(file), type, program_id, program_index);
}
} // namespace Loader
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index edc8bb257..7b1bac3f7 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -227,6 +227,17 @@ public:
}
/**
+ * Get the program ids of the application
+ *
+ * @param[out] out_program_ids Reference to store program ids into
+ *
+ * @return ResultStatus result of function
+ */
+ virtual ResultStatus ReadProgramIds(std::vector<u64>& out_program_ids) {
+ return ResultStatus::ErrorNotImplemented;
+ }
+
+ /**
* Get the RomFS of the application
* Since the RomFS can be huge, we return a file reference instead of copying to a buffer
*
@@ -324,6 +335,6 @@ protected:
* @return the best loader for this file.
*/
std::unique_ptr<AppLoader> GetLoader(Core::System& system, FileSys::VirtualFile file,
- std::size_t program_index = 0);
+ u64 program_id = 0, std::size_t program_index = 0);
} // namespace Loader
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 618555202..951ea966e 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -155,8 +155,8 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
codeset.segments[i].size = PageAlignSize(nro_header.segments[i].size);
}
- if (!Settings::values.program_args.empty()) {
- const auto arg_data = Settings::values.program_args;
+ if (!Settings::values.program_args.GetValue().empty()) {
+ const auto arg_data = Settings::values.program_args.GetValue();
codeset.DataSegment().size += NSO_ARGUMENT_DATA_ALLOCATION_SIZE;
NSOArgumentHeader args_header{
NSO_ARGUMENT_DATA_ALLOCATION_SIZE, static_cast<u32_le>(arg_data.size()), {}};
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 0f5cfda68..4a2224c02 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -104,8 +104,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
codeset.segments[i].size = nso_header.segments[i].size;
}
- if (should_pass_arguments && !Settings::values.program_args.empty()) {
- const auto arg_data{Settings::values.program_args};
+ if (should_pass_arguments && !Settings::values.program_args.GetValue().empty()) {
+ const auto arg_data{Settings::values.program_args.GetValue()};
codeset.DataSegment().size += NSO_ARGUMENT_DATA_ALLOCATION_SIZE;
NSOArgumentHeader args_header{
diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp
index d815a7cd3..8b167ad3c 100644
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -23,10 +23,9 @@ namespace Loader {
AppLoader_NSP::AppLoader_NSP(FileSys::VirtualFile file_,
const Service::FileSystem::FileSystemController& fsc,
- const FileSys::ContentProvider& content_provider,
+ const FileSys::ContentProvider& content_provider, u64 program_id,
std::size_t program_index)
- : AppLoader(file_), nsp(std::make_unique<FileSys::NSP>(file_, program_index)),
- title_id(nsp->GetProgramTitleID()) {
+ : AppLoader(file_), nsp(std::make_unique<FileSys::NSP>(file_, program_id, program_index)) {
if (nsp->GetStatus() != ResultStatus::Success) {
return;
@@ -46,12 +45,8 @@ AppLoader_NSP::AppLoader_NSP(FileSys::VirtualFile file_,
return pm.ParseControlNCA(*control_nca);
}();
- if (title_id == 0) {
- return;
- }
-
secondary_loader = std::make_unique<AppLoader_NCA>(
- nsp->GetNCAFile(title_id, FileSys::ContentRecordType::Program));
+ nsp->GetNCAFile(nsp->GetProgramTitleID(), FileSys::ContentRecordType::Program));
}
}
@@ -68,10 +63,11 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& nsp_file) {
}
// Non-Extracted Type case
+ const auto program_id = nsp.GetProgramTitleID();
if (!nsp.IsExtractedType() &&
- nsp.GetNCA(nsp.GetFirstTitleID(), FileSys::ContentRecordType::Program) != nullptr &&
- AppLoader_NCA::IdentifyType(nsp.GetNCAFile(
- nsp.GetFirstTitleID(), FileSys::ContentRecordType::Program)) == FileType::NCA) {
+ nsp.GetNCA(program_id, FileSys::ContentRecordType::Program) != nullptr &&
+ AppLoader_NCA::IdentifyType(
+ nsp.GetNCAFile(program_id, FileSys::ContentRecordType::Program)) == FileType::NCA) {
return FileType::NSP;
}
}
@@ -84,6 +80,8 @@ AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::KProcess& process, Core::S
return {ResultStatus::ErrorAlreadyLoaded, {}};
}
+ const auto title_id = nsp->GetProgramTitleID();
+
if (!nsp->IsExtractedType() && title_id == 0) {
return {ResultStatus::ErrorNSPMissingProgramNCA, {}};
}
@@ -93,7 +91,7 @@ AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::KProcess& process, Core::S
return {nsp_status, {}};
}
- const auto nsp_program_status = nsp->GetProgramStatus(title_id);
+ const auto nsp_program_status = nsp->GetProgramStatus();
if (nsp_program_status != ResultStatus::Success) {
return {nsp_program_status, {}};
}
@@ -134,8 +132,8 @@ ResultStatus AppLoader_NSP::ReadUpdateRaw(FileSys::VirtualFile& out_file) {
return ResultStatus::ErrorNoPackedUpdate;
}
- const auto read =
- nsp->GetNCAFile(FileSys::GetUpdateTitleID(title_id), FileSys::ContentRecordType::Program);
+ const auto read = nsp->GetNCAFile(FileSys::GetUpdateTitleID(nsp->GetProgramTitleID()),
+ FileSys::ContentRecordType::Program);
if (read == nullptr) {
return ResultStatus::ErrorNoPackedUpdate;
@@ -151,11 +149,15 @@ ResultStatus AppLoader_NSP::ReadUpdateRaw(FileSys::VirtualFile& out_file) {
}
ResultStatus AppLoader_NSP::ReadProgramId(u64& out_program_id) {
- if (title_id == 0) {
+ out_program_id = nsp->GetProgramTitleID();
+ if (out_program_id == 0) {
return ResultStatus::ErrorNotInitialized;
}
+ return ResultStatus::Success;
+}
- out_program_id = title_id;
+ResultStatus AppLoader_NSP::ReadProgramIds(std::vector<u64>& out_program_ids) {
+ out_program_ids = nsp->GetProgramTitleIDs();
return ResultStatus::Success;
}
diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h
index 644c0ff58..50406a92e 100644
--- a/src/core/loader/nsp.h
+++ b/src/core/loader/nsp.h
@@ -28,7 +28,7 @@ class AppLoader_NSP final : public AppLoader {
public:
explicit AppLoader_NSP(FileSys::VirtualFile file_,
const Service::FileSystem::FileSystemController& fsc,
- const FileSys::ContentProvider& content_provider,
+ const FileSys::ContentProvider& content_provider, u64 program_id,
std::size_t program_index);
~AppLoader_NSP() override;
@@ -51,6 +51,7 @@ public:
u64 ReadRomFSIVFCOffset() const override;
ResultStatus ReadUpdateRaw(FileSys::VirtualFile& out_file) override;
ResultStatus ReadProgramId(u64& out_program_id) override;
+ ResultStatus ReadProgramIds(std::vector<u64>& out_program_ids) override;
ResultStatus ReadIcon(std::vector<u8>& buffer) override;
ResultStatus ReadTitle(std::string& title) override;
ResultStatus ReadControlData(FileSys::NACP& nacp) override;
@@ -67,7 +68,6 @@ private:
FileSys::VirtualFile icon_file;
std::unique_ptr<FileSys::NACP> nacp_file;
- u64 title_id;
};
} // namespace Loader
diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp
index 635d6ae15..269603eef 100644
--- a/src/core/loader/xci.cpp
+++ b/src/core/loader/xci.cpp
@@ -22,9 +22,9 @@ namespace Loader {
AppLoader_XCI::AppLoader_XCI(FileSys::VirtualFile file_,
const Service::FileSystem::FileSystemController& fsc,
- const FileSys::ContentProvider& content_provider,
+ const FileSys::ContentProvider& content_provider, u64 program_id,
std::size_t program_index)
- : AppLoader(file_), xci(std::make_unique<FileSys::XCI>(file_, program_index)),
+ : AppLoader(file_), xci(std::make_unique<FileSys::XCI>(file_, program_id, program_index)),
nca_loader(std::make_unique<AppLoader_NCA>(xci->GetProgramNCAFile())) {
if (xci->GetStatus() != ResultStatus::Success) {
return;
@@ -121,6 +121,11 @@ ResultStatus AppLoader_XCI::ReadProgramId(u64& out_program_id) {
return nca_loader->ReadProgramId(out_program_id);
}
+ResultStatus AppLoader_XCI::ReadProgramIds(std::vector<u64>& out_program_ids) {
+ out_program_ids = xci->GetProgramTitleIDs();
+ return ResultStatus::Success;
+}
+
ResultStatus AppLoader_XCI::ReadIcon(std::vector<u8>& buffer) {
if (icon_file == nullptr) {
return ResultStatus::ErrorNoControl;
@@ -149,8 +154,9 @@ ResultStatus AppLoader_XCI::ReadControlData(FileSys::NACP& control) {
}
ResultStatus AppLoader_XCI::ReadManualRomFS(FileSys::VirtualFile& out_file) {
- const auto nca = xci->GetSecurePartitionNSP()->GetNCA(xci->GetProgramTitleID(),
- FileSys::ContentRecordType::HtmlDocument);
+ const auto nca =
+ xci->GetSecurePartitionNSP()->GetNCA(xci->GetSecurePartitionNSP()->GetProgramTitleID(),
+ FileSys::ContentRecordType::HtmlDocument);
if (xci->GetStatus() != ResultStatus::Success || nca == nullptr) {
return ResultStatus::ErrorXCIMissingPartition;
}
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index 708155c30..30caaf90e 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -28,7 +28,7 @@ class AppLoader_XCI final : public AppLoader {
public:
explicit AppLoader_XCI(FileSys::VirtualFile file_,
const Service::FileSystem::FileSystemController& fsc,
- const FileSys::ContentProvider& content_provider,
+ const FileSys::ContentProvider& content_provider, u64 program_id,
std::size_t program_index);
~AppLoader_XCI() override;
@@ -51,6 +51,7 @@ public:
u64 ReadRomFSIVFCOffset() const override;
ResultStatus ReadUpdateRaw(FileSys::VirtualFile& out_file) override;
ResultStatus ReadProgramId(u64& out_program_id) override;
+ ResultStatus ReadProgramIds(std::vector<u64>& out_program_ids) override;
ResultStatus ReadIcon(std::vector<u8>& buffer) override;
ResultStatus ReadTitle(std::string& title) override;
ResultStatus ReadControlData(FileSys::NACP& control) override;
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index 6635a1339..c9ded49d0 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -127,15 +127,15 @@ double PerfStats::GetLastFrameTimeScale() const {
return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
}
-void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
- if (!Settings::values.use_frame_limit.GetValue() ||
+void SpeedLimiter::DoSpeedLimiting(microseconds current_system_time_us) {
+ if (!Settings::values.use_speed_limit.GetValue() ||
Settings::values.use_multi_core.GetValue()) {
return;
}
auto now = Clock::now();
- const double sleep_scale = Settings::values.frame_limit.GetValue() / 100.0;
+ const double sleep_scale = Settings::values.speed_limit.GetValue() / 100.0;
// Max lag caused by slow frames. Shouldn't be more than the length of a frame at the current
// speed percent or it will clamp too much and prevent this from properly limiting to that
@@ -143,17 +143,17 @@ void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
// limiting
const microseconds max_lag_time_us = duration_cast<microseconds>(
std::chrono::duration<double, std::chrono::microseconds::period>(25ms / sleep_scale));
- frame_limiting_delta_err += duration_cast<microseconds>(
+ speed_limiting_delta_err += duration_cast<microseconds>(
std::chrono::duration<double, std::chrono::microseconds::period>(
(current_system_time_us - previous_system_time_us) / sleep_scale));
- frame_limiting_delta_err -= duration_cast<microseconds>(now - previous_walltime);
- frame_limiting_delta_err =
- std::clamp(frame_limiting_delta_err, -max_lag_time_us, max_lag_time_us);
+ speed_limiting_delta_err -= duration_cast<microseconds>(now - previous_walltime);
+ speed_limiting_delta_err =
+ std::clamp(speed_limiting_delta_err, -max_lag_time_us, max_lag_time_us);
- if (frame_limiting_delta_err > microseconds::zero()) {
- std::this_thread::sleep_for(frame_limiting_delta_err);
+ if (speed_limiting_delta_err > microseconds::zero()) {
+ std::this_thread::sleep_for(speed_limiting_delta_err);
auto now_after_sleep = Clock::now();
- frame_limiting_delta_err -= duration_cast<microseconds>(now_after_sleep - now);
+ speed_limiting_delta_err -= duration_cast<microseconds>(now_after_sleep - now);
now = now_after_sleep;
}
diff --git a/src/core/perf_stats.h b/src/core/perf_stats.h
index e5d603717..a2541906f 100644
--- a/src/core/perf_stats.h
+++ b/src/core/perf_stats.h
@@ -85,11 +85,11 @@ private:
double previous_fps = 0;
};
-class FrameLimiter {
+class SpeedLimiter {
public:
using Clock = std::chrono::high_resolution_clock;
- void DoFrameLimiting(std::chrono::microseconds current_system_time_us);
+ void DoSpeedLimiting(std::chrono::microseconds current_system_time_us);
private:
/// Emulated system time (in microseconds) at the last limiter invocation
@@ -98,7 +98,7 @@ private:
Clock::time_point previous_walltime = Clock::now();
/// Accumulated difference between walltime and emulated time
- std::chrono::microseconds frame_limiting_delta_err{0};
+ std::chrono::microseconds speed_limiting_delta_err{0};
};
} // namespace Core
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index 82b0f535a..365b8f906 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -62,7 +62,6 @@ json GetYuzuVersionData() {
{"build_date", std::string(Common::g_build_date)},
{"build_fullname", std::string(Common::g_build_fullname)},
{"build_version", std::string(Common::g_build_version)},
- {"shader_cache_version", std::string(Common::g_shader_cache_version)},
};
}
@@ -397,7 +396,7 @@ void Reporter::ClearFSAccessLog() const {
}
bool Reporter::IsReportingEnabled() const {
- return Settings::values.reporting_services;
+ return Settings::values.reporting_services.GetValue();
}
} // namespace Core
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index d4c23ced2..5a8cfd301 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -135,7 +135,7 @@ u64 RegenerateTelemetryId() {
bool VerifyLogin(const std::string& username, const std::string& token) {
#ifdef ENABLE_WEB_SERVICE
- return WebService::VerifyLogin(Settings::values.web_api_url, username, token);
+ return WebService::VerifyLogin(Settings::values.web_api_url.GetValue(), username, token);
#else
return false;
#endif
@@ -152,7 +152,8 @@ TelemetrySession::~TelemetrySession() {
#ifdef ENABLE_WEB_SERVICE
auto backend = std::make_unique<WebService::TelemetryJson>(
- Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
+ Settings::values.web_api_url.GetValue(), Settings::values.yuzu_username.GetValue(),
+ Settings::values.yuzu_token.GetValue());
#else
auto backend = std::make_unique<Telemetry::NullVisitor>();
#endif
@@ -212,7 +213,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
// Log user configuration information
constexpr auto field_type = Telemetry::FieldType::UserConfig;
- AddField(field_type, "Audio_SinkId", Settings::values.sink_id);
+ AddField(field_type, "Audio_SinkId", Settings::values.sink_id.GetValue());
AddField(field_type, "Audio_EnableAudioStretching",
Settings::values.enable_audio_stretching.GetValue());
AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue());
@@ -220,8 +221,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
TranslateRenderer(Settings::values.renderer_backend.GetValue()));
AddField(field_type, "Renderer_ResolutionFactor",
Settings::values.resolution_factor.GetValue());
- AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit.GetValue());
- AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit.GetValue());
+ AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue());
+ AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue());
AddField(field_type, "Renderer_UseDiskShaderCache",
Settings::values.use_disk_shader_cache.GetValue());
AddField(field_type, "Renderer_GPUAccuracyLevel",
@@ -232,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
Settings::values.use_nvdec_emulation.GetValue());
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
- AddField(field_type, "Renderer_UseAssemblyShaders",
- Settings::values.use_assembly_shaders.GetValue());
+ AddField(field_type, "Renderer_ShaderBackend",
+ static_cast<u32>(Settings::values.shader_backend.GetValue()));
AddField(field_type, "Renderer_UseAsynchronousShaders",
Settings::values.use_asynchronous_shaders.GetValue());
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue());
@@ -242,7 +243,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
bool TelemetrySession::SubmitTestcase() {
#ifdef ENABLE_WEB_SERVICE
auto backend = std::make_unique<WebService::TelemetryJson>(
- Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
+ Settings::values.web_api_url.GetValue(), Settings::values.yuzu_username.GetValue(),
+ Settings::values.yuzu_token.GetValue());
field_collection.Accept(*backend);
return backend->SubmitTestcase();
#else
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 7c5763f9c..c4283a952 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -34,28 +34,17 @@ if (MSVC)
/W4
/WX
- # 'expression' : signed/unsigned mismatch
- /we4018
- # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
- /we4244
- # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
- /we4245
- # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
- /we4254
- # 'var' : conversion from 'size_t' to 'type', possible loss of data
- /we4267
- # 'context' : truncation from 'type1' to 'type2'
- /we4305
+ /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+ /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
+ /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
+ /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
)
else()
target_compile_options(input_common PRIVATE
-Werror
-Werror=conversion
-Werror=ignored-qualifiers
- -Werror=implicit-fallthrough
- -Werror=reorder
-Werror=shadow
- -Werror=sign-compare
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
-Werror=unused-variable
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index 100138d11..2fafd077f 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -27,6 +27,7 @@ public:
down->SetCallback(callbacks);
left->SetCallback(callbacks);
right->SetCallback(callbacks);
+ modifier->SetCallback(callbacks);
}
bool IsAngleGreater(float old_angle, float new_angle) const {
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index 320f51ee6..a2f1bb67c 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -5,14 +5,7 @@
#include <chrono>
#include <thread>
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4200) // nonstandard extension used : zero-sized array in struct/union
-#endif
#include <libusb.h>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
#include "common/logging/log.h"
#include "common/param_package.h"
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 7399c3648..8de3d4520 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -294,8 +294,8 @@ void InputSubsystem::ReloadInputDevices() {
impl->udp->ReloadSockets();
}
-std::vector<std::unique_ptr<Polling::DevicePoller>> InputSubsystem::GetPollers(
- Polling::DeviceType type) const {
+std::vector<std::unique_ptr<Polling::DevicePoller>> InputSubsystem::GetPollers([
+ [maybe_unused]] Polling::DeviceType type) const {
#ifdef HAVE_SDL2
return impl->sdl->GetPollers(type);
#else
diff --git a/src/input_common/mouse/mouse_input.cpp b/src/input_common/mouse/mouse_input.cpp
index a335e6da1..3b052ffb2 100644
--- a/src/input_common/mouse/mouse_input.cpp
+++ b/src/input_common/mouse/mouse_input.cpp
@@ -2,25 +2,23 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
+#include <stop_token>
+#include <thread>
+
#include "common/settings.h"
#include "input_common/mouse/mouse_input.h"
namespace MouseInput {
Mouse::Mouse() {
- update_thread = std::thread(&Mouse::UpdateThread, this);
+ update_thread = std::jthread([this](std::stop_token stop_token) { UpdateThread(stop_token); });
}
-Mouse::~Mouse() {
- update_thread_running = false;
- if (update_thread.joinable()) {
- update_thread.join();
- }
-}
+Mouse::~Mouse() = default;
-void Mouse::UpdateThread() {
+void Mouse::UpdateThread(std::stop_token stop_token) {
constexpr int update_time = 10;
- while (update_thread_running) {
+ while (!stop_token.stop_requested()) {
for (MouseInfo& info : mouse_info) {
const Common::Vec3f angular_direction{
-info.tilt_direction.y,
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h
index 5a971ad67..c8bae99c1 100644
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -6,6 +6,7 @@
#include <array>
#include <mutex>
+#include <stop_token>
#include <thread>
#include "common/common_types.h"
@@ -85,7 +86,7 @@ public:
[[nodiscard]] const MouseData& GetMouseState(std::size_t button) const;
private:
- void UpdateThread();
+ void UpdateThread(std::stop_token stop_token);
void UpdateYuzuSettings();
void StopPanning();
@@ -105,12 +106,11 @@ private:
u16 buttons{};
u16 toggle_buttons{};
u16 lock_buttons{};
- std::thread update_thread;
+ std::jthread update_thread;
MouseButton last_button{MouseButton::Undefined};
std::array<MouseInfo, 7> mouse_info;
Common::SPSCQueue<MouseStatus> mouse_queue;
bool configuring{false};
- bool update_thread_running{true};
int mouse_panning_timout{};
};
} // namespace MouseInput
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index d96104a4e..efcdd85d2 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
+#include <memory>
#include <mutex>
#include <utility>
@@ -82,7 +84,8 @@ public:
std::lock_guard lock{mutex};
const auto axis_value =
static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis));
- return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range);
+ const float sensitivity = Settings::values.mouse_panning_sensitivity.GetValue() * 0.10f;
+ return axis_value * sensitivity / (100.0f * range);
}
std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index 68672a92b..70a0ba09c 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -115,6 +115,41 @@ public:
return state.buttons.at(button);
}
+ bool ToggleButton(int button) {
+ std::lock_guard lock{mutex};
+
+ if (!state.toggle_buttons.contains(button) || !state.lock_buttons.contains(button)) {
+ state.toggle_buttons.insert_or_assign(button, false);
+ state.lock_buttons.insert_or_assign(button, false);
+ }
+
+ const bool button_state = state.toggle_buttons.at(button);
+ const bool button_lock = state.lock_buttons.at(button);
+
+ if (button_lock) {
+ return button_state;
+ }
+
+ state.lock_buttons.insert_or_assign(button, true);
+
+ if (button_state) {
+ state.toggle_buttons.insert_or_assign(button, false);
+ } else {
+ state.toggle_buttons.insert_or_assign(button, true);
+ }
+
+ return !button_state;
+ }
+
+ bool UnlockButton(int button) {
+ std::lock_guard lock{mutex};
+ if (!state.toggle_buttons.contains(button)) {
+ return false;
+ }
+ state.lock_buttons.insert_or_assign(button, false);
+ return state.toggle_buttons.at(button);
+ }
+
void SetAxis(int axis, Sint16 value) {
std::lock_guard lock{mutex};
state.axes.insert_or_assign(axis, value);
@@ -130,10 +165,10 @@ public:
if (sdl_controller) {
return SDL_GameControllerRumble(sdl_controller.get(), amp_low, amp_high,
- rumble_max_duration_ms) == 0;
+ rumble_max_duration_ms) != -1;
} else if (sdl_joystick) {
return SDL_JoystickRumble(sdl_joystick.get(), amp_low, amp_high,
- rumble_max_duration_ms) == 0;
+ rumble_max_duration_ms) != -1;
}
return false;
@@ -241,6 +276,8 @@ public:
private:
struct State {
std::unordered_map<int, bool> buttons;
+ std::unordered_map<int, bool> toggle_buttons{};
+ std::unordered_map<int, bool> lock_buttons{};
std::unordered_map<int, Sint16> axes;
std::unordered_map<int, Uint8> hats;
} state;
@@ -402,16 +439,25 @@ void SDLState::CloseJoysticks() {
class SDLButton final : public Input::ButtonDevice {
public:
- explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
- : joystick(std::move(joystick_)), button(button_) {}
+ explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_, bool toggle_)
+ : joystick(std::move(joystick_)), button(button_), toggle(toggle_) {}
bool GetStatus() const override {
- return joystick->GetButton(button);
+ const bool button_state = joystick->GetButton(button);
+ if (!toggle) {
+ return button_state;
+ }
+
+ if (button_state) {
+ return joystick->ToggleButton(button);
+ }
+ return joystick->UnlockButton(button);
}
private:
std::shared_ptr<SDLJoystick> joystick;
int button;
+ bool toggle;
};
class SDLDirectionButton final : public Input::ButtonDevice {
@@ -635,6 +681,7 @@ public:
std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
const std::string guid = params.Get("guid", "0");
const int port = params.Get("port", 0);
+ const auto toggle = params.Get("toggle", false);
auto joystick = state.GetSDLJoystickByGUID(guid, port);
@@ -660,7 +707,8 @@ public:
if (params.Has("axis")) {
const int axis = params.Get("axis", 0);
- const float threshold = params.Get("threshold", 0.5f);
+ // Convert range from (0.0, 1.0) to (-1.0, 1.0)
+ const float threshold = (params.Get("threshold", 0.5f) - 0.5f) * 2.0f;
const std::string direction_name = params.Get("direction", "");
bool trigger_if_greater;
if (direction_name == "+") {
@@ -679,7 +727,7 @@ public:
const int button = params.Get("button", 0);
// This is necessary so accessing GetButton with button won't crash
joystick->SetButton(button, false);
- return std::make_unique<SDLButton>(joystick, button);
+ return std::make_unique<SDLButton>(joystick, button, toggle);
}
private:
@@ -933,12 +981,11 @@ Common::ParamPackage BuildAnalogParamPackageForButton(int port, std::string guid
params.Set("port", port);
params.Set("guid", std::move(guid));
params.Set("axis", axis);
+ params.Set("threshold", "0.5");
if (value > 0) {
params.Set("direction", "+");
- params.Set("threshold", "0.5");
} else {
params.Set("direction", "-");
- params.Set("threshold", "-0.5");
}
return params;
}
diff --git a/src/input_common/sdl/sdl_impl.h b/src/input_common/sdl/sdl_impl.h
index b77afcbd8..7a9ad6346 100644
--- a/src/input_common/sdl/sdl_impl.h
+++ b/src/input_common/sdl/sdl_impl.h
@@ -10,15 +10,7 @@
#include <thread>
#include <unordered_map>
-// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
-#endif
#include <SDL.h>
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
diff --git a/src/input_common/touch_from_button.cpp b/src/input_common/touch_from_button.cpp
index e94ba197b..7878a56d7 100644
--- a/src/input_common/touch_from_button.cpp
+++ b/src/input_common/touch_from_button.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include "common/settings.h"
#include "core/frontend/framebuffer_layout.h"
#include "input_common/touch_from_button.h"
@@ -12,7 +13,7 @@ class TouchFromButtonDevice final : public Input::TouchDevice {
public:
TouchFromButtonDevice() {
const auto button_index =
- static_cast<std::size_t>(Settings::values.touch_from_button_map_index);
+ static_cast<u64>(Settings::values.touch_from_button_map_index.GetValue());
const auto& buttons = Settings::values.touch_from_button_maps[button_index].buttons;
for (const auto& config_entry : buttons) {
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index bc1dfab3d..9b0aec797 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -201,7 +201,7 @@ bool Client::DeviceConnected(std::size_t pad) const {
void Client::ReloadSockets() {
Reset();
- std::stringstream servers_ss(Settings::values.udp_input_servers);
+ std::stringstream servers_ss(static_cast<std::string>(Settings::values.udp_input_servers));
std::string server_token;
std::size_t client = 0;
while (std::getline(servers_ss, server_token, ',')) {
@@ -370,7 +370,7 @@ std::optional<std::size_t> Client::GetUnusedFingerID() const {
void Client::UpdateTouchInput(Response::TouchPad& touch_pad, std::size_t client, std::size_t id) {
// TODO: Use custom calibration per device
- const Common::ParamPackage touch_param(Settings::values.touch_device);
+ const Common::ParamPackage touch_param(Settings::values.touch_device.GetValue());
const u16 min_x = static_cast<u16>(touch_param.Get("min_x", 100));
const u16 min_y = static_cast<u16>(touch_param.Get("min_y", 50));
const u16 max_x = static_cast<u16>(touch_param.Get("max_x", 1800));
diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h
index a3d276697..1bdc9209e 100644
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -8,14 +8,7 @@
#include <optional>
#include <type_traits>
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4701)
-#endif
#include <boost/crc.hpp>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
#include "common/bit_field.h"
#include "common/swap.h"
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
new file mode 100644
index 000000000..b5b7e5e83
--- /dev/null
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -0,0 +1,268 @@
+add_library(shader_recompiler STATIC
+ backend/bindings.h
+ backend/glasm/emit_context.cpp
+ backend/glasm/emit_context.h
+ backend/glasm/emit_glasm.cpp
+ backend/glasm/emit_glasm.h
+ backend/glasm/emit_glasm_barriers.cpp
+ backend/glasm/emit_glasm_bitwise_conversion.cpp
+ backend/glasm/emit_glasm_composite.cpp
+ backend/glasm/emit_glasm_context_get_set.cpp
+ backend/glasm/emit_glasm_control_flow.cpp
+ backend/glasm/emit_glasm_convert.cpp
+ backend/glasm/emit_glasm_floating_point.cpp
+ backend/glasm/emit_glasm_image.cpp
+ backend/glasm/emit_glasm_instructions.h
+ backend/glasm/emit_glasm_integer.cpp
+ backend/glasm/emit_glasm_logical.cpp
+ backend/glasm/emit_glasm_memory.cpp
+ backend/glasm/emit_glasm_not_implemented.cpp
+ backend/glasm/emit_glasm_select.cpp
+ backend/glasm/emit_glasm_shared_memory.cpp
+ backend/glasm/emit_glasm_special.cpp
+ backend/glasm/emit_glasm_undefined.cpp
+ backend/glasm/emit_glasm_warp.cpp
+ backend/glasm/reg_alloc.cpp
+ backend/glasm/reg_alloc.h
+ backend/glsl/emit_context.cpp
+ backend/glsl/emit_context.h
+ backend/glsl/emit_glsl.cpp
+ backend/glsl/emit_glsl.h
+ backend/glsl/emit_glsl_atomic.cpp
+ backend/glsl/emit_glsl_barriers.cpp
+ backend/glsl/emit_glsl_bitwise_conversion.cpp
+ backend/glsl/emit_glsl_composite.cpp
+ backend/glsl/emit_glsl_context_get_set.cpp
+ backend/glsl/emit_glsl_control_flow.cpp
+ backend/glsl/emit_glsl_convert.cpp
+ backend/glsl/emit_glsl_floating_point.cpp
+ backend/glsl/emit_glsl_image.cpp
+ backend/glsl/emit_glsl_instructions.h
+ backend/glsl/emit_glsl_integer.cpp
+ backend/glsl/emit_glsl_logical.cpp
+ backend/glsl/emit_glsl_memory.cpp
+ backend/glsl/emit_glsl_not_implemented.cpp
+ backend/glsl/emit_glsl_select.cpp
+ backend/glsl/emit_glsl_shared_memory.cpp
+ backend/glsl/emit_glsl_special.cpp
+ backend/glsl/emit_glsl_undefined.cpp
+ backend/glsl/emit_glsl_warp.cpp
+ backend/glsl/var_alloc.cpp
+ backend/glsl/var_alloc.h
+ backend/spirv/emit_context.cpp
+ backend/spirv/emit_context.h
+ backend/spirv/emit_spirv.cpp
+ backend/spirv/emit_spirv.h
+ backend/spirv/emit_spirv_atomic.cpp
+ backend/spirv/emit_spirv_barriers.cpp
+ backend/spirv/emit_spirv_bitwise_conversion.cpp
+ backend/spirv/emit_spirv_composite.cpp
+ backend/spirv/emit_spirv_context_get_set.cpp
+ backend/spirv/emit_spirv_control_flow.cpp
+ backend/spirv/emit_spirv_convert.cpp
+ backend/spirv/emit_spirv_floating_point.cpp
+ backend/spirv/emit_spirv_image.cpp
+ backend/spirv/emit_spirv_image_atomic.cpp
+ backend/spirv/emit_spirv_instructions.h
+ backend/spirv/emit_spirv_integer.cpp
+ backend/spirv/emit_spirv_logical.cpp
+ backend/spirv/emit_spirv_memory.cpp
+ backend/spirv/emit_spirv_select.cpp
+ backend/spirv/emit_spirv_shared_memory.cpp
+ backend/spirv/emit_spirv_special.cpp
+ backend/spirv/emit_spirv_undefined.cpp
+ backend/spirv/emit_spirv_warp.cpp
+ environment.h
+ exception.h
+ frontend/ir/abstract_syntax_list.h
+ frontend/ir/attribute.cpp
+ frontend/ir/attribute.h
+ frontend/ir/basic_block.cpp
+ frontend/ir/basic_block.h
+ frontend/ir/breadth_first_search.h
+ frontend/ir/condition.cpp
+ frontend/ir/condition.h
+ frontend/ir/flow_test.cpp
+ frontend/ir/flow_test.h
+ frontend/ir/ir_emitter.cpp
+ frontend/ir/ir_emitter.h
+ frontend/ir/microinstruction.cpp
+ frontend/ir/modifiers.h
+ frontend/ir/opcodes.cpp
+ frontend/ir/opcodes.h
+ frontend/ir/opcodes.inc
+ frontend/ir/patch.cpp
+ frontend/ir/patch.h
+ frontend/ir/post_order.cpp
+ frontend/ir/post_order.h
+ frontend/ir/pred.h
+ frontend/ir/program.cpp
+ frontend/ir/program.h
+ frontend/ir/reg.h
+ frontend/ir/type.cpp
+ frontend/ir/type.h
+ frontend/ir/value.cpp
+ frontend/ir/value.h
+ frontend/maxwell/control_flow.cpp
+ frontend/maxwell/control_flow.h
+ frontend/maxwell/decode.cpp
+ frontend/maxwell/decode.h
+ frontend/maxwell/indirect_branch_table_track.cpp
+ frontend/maxwell/indirect_branch_table_track.h
+ frontend/maxwell/instruction.h
+ frontend/maxwell/location.h
+ frontend/maxwell/maxwell.inc
+ frontend/maxwell/opcodes.cpp
+ frontend/maxwell/opcodes.h
+ frontend/maxwell/structured_control_flow.cpp
+ frontend/maxwell/structured_control_flow.h
+ frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
+ frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
+ frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
+ frontend/maxwell/translate/impl/barrier_operations.cpp
+ frontend/maxwell/translate/impl/bitfield_extract.cpp
+ frontend/maxwell/translate/impl/bitfield_insert.cpp
+ frontend/maxwell/translate/impl/branch_indirect.cpp
+ frontend/maxwell/translate/impl/common_encoding.h
+ frontend/maxwell/translate/impl/common_funcs.cpp
+ frontend/maxwell/translate/impl/common_funcs.h
+ frontend/maxwell/translate/impl/condition_code_set.cpp
+ frontend/maxwell/translate/impl/double_add.cpp
+ frontend/maxwell/translate/impl/double_compare_and_set.cpp
+ frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
+ frontend/maxwell/translate/impl/double_min_max.cpp
+ frontend/maxwell/translate/impl/double_multiply.cpp
+ frontend/maxwell/translate/impl/double_set_predicate.cpp
+ frontend/maxwell/translate/impl/exit_program.cpp
+ frontend/maxwell/translate/impl/find_leading_one.cpp
+ frontend/maxwell/translate/impl/floating_point_add.cpp
+ frontend/maxwell/translate/impl/floating_point_compare.cpp
+ frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
+ frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
+ frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+ frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+ frontend/maxwell/translate/impl/floating_point_min_max.cpp
+ frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+ frontend/maxwell/translate/impl/floating_point_multiply.cpp
+ frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
+ frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
+ frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+ frontend/maxwell/translate/impl/half_floating_point_add.cpp
+ frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
+ frontend/maxwell/translate/impl/half_floating_point_helper.cpp
+ frontend/maxwell/translate/impl/half_floating_point_helper.h
+ frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
+ frontend/maxwell/translate/impl/half_floating_point_set.cpp
+ frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
+ frontend/maxwell/translate/impl/impl.cpp
+ frontend/maxwell/translate/impl/impl.h
+ frontend/maxwell/translate/impl/integer_add.cpp
+ frontend/maxwell/translate/impl/integer_add_three_input.cpp
+ frontend/maxwell/translate/impl/integer_compare.cpp
+ frontend/maxwell/translate/impl/integer_compare_and_set.cpp
+ frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
+ frontend/maxwell/translate/impl/integer_funnel_shift.cpp
+ frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
+ frontend/maxwell/translate/impl/integer_popcount.cpp
+ frontend/maxwell/translate/impl/integer_scaled_add.cpp
+ frontend/maxwell/translate/impl/integer_set_predicate.cpp
+ frontend/maxwell/translate/impl/integer_shift_left.cpp
+ frontend/maxwell/translate/impl/integer_shift_right.cpp
+ frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
+ frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
+ frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
+ frontend/maxwell/translate/impl/load_constant.cpp
+ frontend/maxwell/translate/impl/load_constant.h
+ frontend/maxwell/translate/impl/load_effective_address.cpp
+ frontend/maxwell/translate/impl/load_store_attribute.cpp
+ frontend/maxwell/translate/impl/load_store_local_shared.cpp
+ frontend/maxwell/translate/impl/load_store_memory.cpp
+ frontend/maxwell/translate/impl/logic_operation.cpp
+ frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+ frontend/maxwell/translate/impl/move_predicate_to_register.cpp
+ frontend/maxwell/translate/impl/move_register.cpp
+ frontend/maxwell/translate/impl/move_register_to_predicate.cpp
+ frontend/maxwell/translate/impl/move_special_register.cpp
+ frontend/maxwell/translate/impl/not_implemented.cpp
+ frontend/maxwell/translate/impl/output_geometry.cpp
+ frontend/maxwell/translate/impl/pixel_load.cpp
+ frontend/maxwell/translate/impl/predicate_set_predicate.cpp
+ frontend/maxwell/translate/impl/predicate_set_register.cpp
+ frontend/maxwell/translate/impl/select_source_with_predicate.cpp
+ frontend/maxwell/translate/impl/surface_atomic_operations.cpp
+ frontend/maxwell/translate/impl/surface_load_store.cpp
+ frontend/maxwell/translate/impl/texture_fetch.cpp
+ frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+ frontend/maxwell/translate/impl/texture_gather.cpp
+ frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
+ frontend/maxwell/translate/impl/texture_gradient.cpp
+ frontend/maxwell/translate/impl/texture_load.cpp
+ frontend/maxwell/translate/impl/texture_load_swizzled.cpp
+ frontend/maxwell/translate/impl/texture_mipmap_level.cpp
+ frontend/maxwell/translate/impl/texture_query.cpp
+ frontend/maxwell/translate/impl/video_helper.cpp
+ frontend/maxwell/translate/impl/video_helper.h
+ frontend/maxwell/translate/impl/video_minimum_maximum.cpp
+ frontend/maxwell/translate/impl/video_multiply_add.cpp
+ frontend/maxwell/translate/impl/video_set_predicate.cpp
+ frontend/maxwell/translate/impl/vote.cpp
+ frontend/maxwell/translate/impl/warp_shuffle.cpp
+ frontend/maxwell/translate/translate.cpp
+ frontend/maxwell/translate/translate.h
+ frontend/maxwell/translate_program.cpp
+ frontend/maxwell/translate_program.h
+ host_translate_info.h
+ ir_opt/collect_shader_info_pass.cpp
+ ir_opt/constant_propagation_pass.cpp
+ ir_opt/dead_code_elimination_pass.cpp
+ ir_opt/dual_vertex_pass.cpp
+ ir_opt/global_memory_to_storage_buffer_pass.cpp
+ ir_opt/identity_removal_pass.cpp
+ ir_opt/lower_fp16_to_fp32.cpp
+ ir_opt/lower_int64_to_int32.cpp
+ ir_opt/passes.h
+ ir_opt/ssa_rewrite_pass.cpp
+ ir_opt/texture_pass.cpp
+ ir_opt/verification_pass.cpp
+ object_pool.h
+ profile.h
+ program_header.h
+ runtime_info.h
+ shader_info.h
+ varying_state.h
+)
+
+target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
+
+if (MSVC)
+ target_compile_options(shader_recompiler PRIVATE
+ /W4
+ /WX
+ /we4018 # 'expression' : signed/unsigned mismatch
+ /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point)
+ /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
+ /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
+ /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+ /we4305 # 'context' : truncation from 'type1' to 'type2'
+ /we4800 # Implicit conversion from 'type' to bool. Possible information loss
+ /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior.
+ )
+else()
+ target_compile_options(shader_recompiler PRIVATE
+ -Werror
+ -Werror=conversion
+ -Werror=ignored-qualifiers
+ -Werror=implicit-fallthrough
+ -Werror=shadow
+ -Werror=sign-compare
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
+ $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
+ -Werror=unused-variable
+
+ # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6.
+ # And this in turns limits the size of a std::array.
+ $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024>
+ )
+endif()
+
+create_target_directory_groups(shader_recompiler)
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h
new file mode 100644
index 000000000..35503000c
--- /dev/null
+++ b/src/shader_recompiler/backend/bindings.h
@@ -0,0 +1,19 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::Backend {
+
+struct Bindings {
+ u32 unified{};
+ u32 uniform_buffer{};
+ u32 storage_buffer{};
+ u32 texture{};
+ u32 image{};
+};
+
+} // namespace Shader::Backend
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
new file mode 100644
index 000000000..069c019ad
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view InterpDecorator(Interpolation interp) {
+ switch (interp) {
+ case Interpolation::Smooth:
+ return "";
+ case Interpolation::Flat:
+ return "FLAT ";
+ case Interpolation::NoPerspective:
+ return "NOPERSPECTIVE ";
+ }
+ throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_)
+ : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
+ // FIXME: Temporary partial implementation
+ u32 cbuf_index{};
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Constant buffer descriptor array");
+ }
+ Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
+ ++cbuf_index;
+ }
+ u32 ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Storage buffer descriptor array");
+ }
+ if (runtime_info.glasm_use_storage_buffers) {
+ Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
+ ++bindings.storage_buffer;
+ ++ssbo_index;
+ }
+ }
+ if (!runtime_info.glasm_use_storage_buffers) {
+ if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
+ Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
+ }
+ }
+ stage = program.stage;
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ stage_name = "vertex";
+ attrib_name = "vertex";
+ break;
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ stage_name = "primitive";
+ attrib_name = "primitive";
+ break;
+ case Stage::Geometry:
+ stage_name = "primitive";
+ attrib_name = "vertex";
+ break;
+ case Stage::Fragment:
+ stage_name = "fragment";
+ attrib_name = "fragment";
+ break;
+ case Stage::Compute:
+ stage_name = "invocation";
+ break;
+ }
+ const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"};
+ const VaryingState loads{info.loads.mask | info.passthrough.mask};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (loads.Generic(index)) {
+ Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};",
+ InterpDecorator(info.interpolation[index]), index, attr_stage, index, index);
+ }
+ }
+ if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) {
+ Add("ATTRIB vertex_position=vertex.position;");
+ }
+ if (info.uses_invocation_id) {
+ Add("ATTRIB primitive_invocation=primitive.invocation;");
+ }
+ if (info.stores_tess_level_outer) {
+ Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};");
+ }
+ if (info.stores_tess_level_inner) {
+ Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};");
+ }
+ if (info.stores.ClipDistances()) {
+ Add("OUTPUT result_clip[]={{result.clip[0..7]}};");
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ if (stage == Stage::TessellationControl) {
+ Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};"
+ "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};",
+ index, index, index, index, index, index);
+ } else {
+ Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index,
+ index, index);
+ }
+ }
+ if (stage == Stage::Fragment) {
+ Add("OUTPUT frag_color0=result.color;");
+ for (size_t index = 1; index < info.stores_frag_color.size(); ++index) {
+ Add("OUTPUT frag_color{}=result.color[{}];", index, index);
+ }
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index);
+ }
+ }
+ image_buffer_bindings.reserve(info.image_buffer_descriptors.size());
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_buffer_bindings.push_back(bindings.image);
+ bindings.image += desc.count;
+ }
+ image_bindings.reserve(info.image_descriptors.size());
+ for (const auto& desc : info.image_descriptors) {
+ image_bindings.push_back(bindings.image);
+ bindings.image += desc.count;
+ }
+ texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size());
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ texture_buffer_bindings.push_back(bindings.texture);
+ bindings.texture += desc.count;
+ }
+ texture_bindings.reserve(info.texture_descriptors.size());
+ for (const auto& desc : info.texture_descriptors) {
+ texture_bindings.push_back(bindings.texture);
+ bindings.texture += desc.count;
+ }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h
new file mode 100644
index 000000000..8433e5c00
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_context.h
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext {
+public:
+ explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_);
+
+ template <typename... Args>
+ void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst),
+ std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst),
+ std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void Add(const char* format_str, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ std::string code;
+ RegAlloc reg_alloc{};
+ const Info& info;
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+
+ std::vector<u32> texture_buffer_bindings;
+ std::vector<u32> image_buffer_bindings;
+ std::vector<u32> texture_bindings;
+ std::vector<u32> image_bindings;
+
+ Stage stage{};
+ std::string_view stage_name = "invalid";
+ std::string_view attrib_name = "invalid";
+
+ u32 num_safety_loop_vars{};
+ bool uses_y_direction{};
+};
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..4ce1c4f54
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <typename T>
+struct Identity {
+ Identity(T data_) : data{data_} {}
+
+ T Extract() {
+ return data;
+ }
+
+ T data;
+};
+
+template <bool scalar>
+class RegWrapper {
+public:
+ RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
+ const Value value{reg_alloc.Peek(ir_value)};
+ if (value.type == Type::Register) {
+ inst = ir_value.InstRecursive();
+ reg = Register{value};
+ } else {
+ reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
+ }
+ switch (value.type) {
+ case Type::Register:
+ case Type::Void:
+ break;
+ case Type::U32:
+ ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
+ break;
+ case Type::U64:
+ ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
+ break;
+ }
+ }
+
+ auto Extract() {
+ if (inst) {
+ reg_alloc.Unref(*inst);
+ } else {
+ reg_alloc.FreeReg(reg);
+ }
+ return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
+ }
+
+private:
+ RegAlloc& reg_alloc;
+ IR::Inst* inst{};
+ Register reg{};
+};
+
+template <typename ArgType>
+class ValueWrapper {
+public:
+ ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
+ : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
+
+ ArgType Extract() {
+ if (!ir_value.IsImmediate()) {
+ reg_alloc.Unref(*ir_value.InstRecursive());
+ }
+ return value;
+ }
+
+private:
+ RegAlloc& reg_alloc;
+ const IR::Value& ir_value;
+ ArgType value;
+};
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, Register>) {
+ return RegWrapper<false>{ctx, arg};
+ } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
+ return RegWrapper<true>{ctx, arg};
+ } else if constexpr (std::is_base_of_v<Value, ArgType>) {
+ return ValueWrapper<ArgType>{ctx, arg};
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return Identity<const IR::Value&>{arg};
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return Identity{arg.U32()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return Identity{arg.Attribute()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return Identity{arg.Patch()};
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return Identity{arg.Reg()};
+ }
+}
+
+template <auto func, bool is_first_arg_inst>
+struct InvokeCall {
+ template <typename... Args>
+ InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, *inst, args.Extract()...);
+ } else {
+ func(ctx, args.Extract()...);
+ }
+ }
+};
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (is_first_arg_inst) {
+ InvokeCall<func, is_first_arg_inst>{
+ ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
+ } else {
+ InvokeCall<func, is_first_arg_inst>{
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+ // Insert phi moves before references to avoid overwritting other phis
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Block& phi_block{*phi.PhiBlock(i)};
+ auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+ IR::IREmitter ir{phi_block, it};
+ const IR::Value arg{phi.Arg(i)};
+ if (arg.IsImmediate()) {
+ ir.PhiMove(phi, arg);
+ } else {
+ ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
+ }
+ }
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+ }
+}
+
+void Precolor(const IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& phi : block->Instructions()) {
+ if (!IR::IsPhi(phi)) {
+ break;
+ }
+ PrecolorInst(phi);
+ }
+ }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+ const auto eval{
+ [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::If:
+ ctx.Add("MOV.S.CC RC,{};"
+ "IF NE.x;",
+ eval(node.data.if_node.cond));
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ ctx.Add("ENDIF;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Loop:
+ ctx.Add("REP;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ if (!Settings::values.disable_shader_loop_safety_checks) {
+ const u32 loop_index{ctx.num_safety_loop_vars++};
+ const u32 vector_index{loop_index / 4};
+ const char component{"xyzw"[loop_index % 4]};
+ ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
+ "BRK(LT.{});",
+ vector_index, component, vector_index, component, component);
+ }
+ if (node.data.repeat.cond.IsImmediate()) {
+ if (node.data.repeat.cond.U1()) {
+ ctx.Add("ENDREP;");
+ } else {
+ ctx.Add("BRK;"
+ "ENDREP;");
+ }
+ } else {
+ ctx.Add("MOV.S.CC RC,{};"
+ "BRK(EQ.x);"
+ "ENDREP;",
+ eval(node.data.repeat.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Break:
+ if (node.data.break_node.cond.IsImmediate()) {
+ if (node.data.break_node.cond.U1()) {
+ ctx.Add("BRK;");
+ }
+ } else {
+ ctx.Add("MOV.S.CC RC,{};"
+ "BRK (NE.x);",
+ eval(node.data.break_node.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Return:
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.Add("RET;");
+ break;
+ }
+ }
+ if (!ctx.reg_alloc.IsEmpty()) {
+ LOG_WARNING(Shader_GLASM, "Register leak after generating code");
+ }
+}
+
+void SetupOptions(const IR::Program& program, const Profile& profile,
+ const RuntimeInfo& runtime_info, std::string& header) {
+ const Info& info{program.info};
+ const Stage stage{program.stage};
+
+ // TODO: Track the shared atomic ops
+ header += "OPTION NV_internal;"
+ "OPTION NV_shader_storage_buffer;"
+ "OPTION NV_gpu_program_fp64;";
+ if (info.uses_int64_bit_atomics) {
+ header += "OPTION NV_shader_atomic_int64;";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "OPTION NV_shader_atomic_float;";
+ }
+ if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+ header += "OPTION NV_shader_atomic_fp16_vector;";
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+ info.uses_fswzadd) {
+ header += "OPTION NV_shader_thread_group;";
+ }
+ if (info.uses_subgroup_shuffles) {
+ header += "OPTION NV_shader_thread_shuffle;";
+ }
+ if (info.uses_sparse_residency) {
+ header += "OPTION EXT_sparse_texture2;";
+ }
+ const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
+ info.stores[IR::Attribute::Layer]};
+ if ((stage != Stage::Geometry && stores_viewport_layer) ||
+ info.stores[IR::Attribute::ViewportMask]) {
+ if (profile.support_viewport_index_layer_non_geometry) {
+ header += "OPTION NV_viewport_array2;";
+ }
+ }
+ if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
+ header += "OPTION NV_geometry_shader_passthrough;";
+ }
+ if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+ header += "OPTION EXT_shader_image_load_formatted;";
+ }
+ if (profile.support_derivative_control) {
+ header += "OPTION ARB_derivative_control;";
+ }
+ if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
+ header += "OPTION NV_early_fragment_tests;";
+ }
+ if (stage == Stage::Fragment) {
+ header += "OPTION ARB_draw_buffers;";
+ }
+}
+
+std::string_view StageHeader(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return "!!NVvp5.0\n";
+ case Stage::TessellationControl:
+ return "!!NVtcp5.0\n";
+ case Stage::TessellationEval:
+ return "!!NVtep5.0\n";
+ case Stage::Geometry:
+ return "!!NVgp5.0\n";
+ case Stage::Fragment:
+ return "!!NVfp5.0\n";
+ case Stage::Compute:
+ return "!!NVcp5.0\n";
+ }
+ throw InvalidArgument("Invalid stage {}", stage);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+ switch (topology) {
+ case InputTopology::Points:
+ return "POINTS";
+ case InputTopology::Lines:
+ return "LINES";
+ case InputTopology::LinesAdjacency:
+ return "LINES_ADJACENCY";
+ case InputTopology::Triangles:
+ return "TRIANGLES";
+ case InputTopology::TrianglesAdjacency:
+ return "TRIANGLES_ADJACENCY";
+ }
+ throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+ switch (topology) {
+ case OutputTopology::PointList:
+ return "POINTS";
+ case OutputTopology::LineStrip:
+ return "LINE_STRIP";
+ case OutputTopology::TriangleStrip:
+ return "TRIANGLE_STRIP";
+ }
+ throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Triangles:
+ return "TRIANGLES";
+ case TessPrimitive::Quads:
+ return "QUADS";
+ case TessPrimitive::Isolines:
+ return "ISOLINES";
+ }
+ throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return "EQUAL";
+ case TessSpacing::FractionalOdd:
+ return "FRACTIONAL_ODD";
+ case TessSpacing::FractionalEven:
+ return "FRACTIONAL_EVEN";
+ }
+ throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+} // Anonymous namespace
+
+std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+ Bindings& bindings) {
+ EmitContext ctx{program, bindings, profile, runtime_info};
+ Precolor(program);
+ EmitCode(ctx, program);
+ std::string header{StageHeader(program.stage)};
+ SetupOptions(program, profile, runtime_info, header);
+ switch (program.stage) {
+ case Stage::TessellationControl:
+ header += fmt::format("VERTICES_OUT {};", program.invocations);
+ break;
+ case Stage::TessellationEval:
+ header += fmt::format("TESS_MODE {};"
+ "TESS_SPACING {};"
+ "TESS_VERTEX_ORDER {};",
+ GetTessMode(runtime_info.tess_primitive),
+ GetTessSpacing(runtime_info.tess_spacing),
+ runtime_info.tess_clockwise ? "CW" : "CCW");
+ break;
+ case Stage::Geometry:
+ header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
+ if (program.is_geometry_passthrough) {
+ if (profile.support_geometry_shader_passthrough) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (program.info.passthrough.Generic(index)) {
+ header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
+ }
+ }
+ if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+ header += "PASSTHROUGH result.position;";
+ }
+ } else {
+ LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
+ }
+ } else {
+ header +=
+ fmt::format("VERTICES_OUT {};"
+ "PRIMITIVE_OUT {};",
+ program.output_vertices, OutputPrimitive(program.output_topology));
+ }
+ break;
+ case Stage::Compute:
+ header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
+ program.workgroup_size[1], program.workgroup_size[2]);
+ break;
+ default:
+ break;
+ }
+ if (program.shared_memory_size > 0) {
+ header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
+ header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
+ }
+ header += "TEMP ";
+ for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
+ header += fmt::format("R{},", index);
+ }
+ if (program.local_memory_size > 0) {
+ header += fmt::format("lmem[{}],", program.local_memory_size);
+ }
+ if (program.info.uses_fswzadd) {
+ header += "FSWZA[4],FSWZB[4],";
+ }
+ const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
+ for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+ header += fmt::format("loop{},", index);
+ }
+ header += "RC;"
+ "LONG TEMP ";
+ for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
+ header += fmt::format("D{},", index);
+ }
+ header += "DC;";
+ if (program.info.uses_fswzadd) {
+ header += "MOV.F FSWZA[0],-1;"
+ "MOV.F FSWZA[1],1;"
+ "MOV.F FSWZA[2],-1;"
+ "MOV.F FSWZA[3],0;"
+ "MOV.F FSWZB[0],-1;"
+ "MOV.F FSWZB[1],-1;"
+ "MOV.F FSWZB[2],1;"
+ "MOV.F FSWZB[3],-1;";
+ }
+ for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
+ header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
+ }
+ if (ctx.uses_y_direction) {
+ header += "PARAM y_direction[1]={state.material.front.ambient};";
+ }
+ ctx.code.insert(0, header);
+ ctx.code += "END";
+ return ctx.code;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
new file mode 100644
index 000000000..bcb55f062
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -0,0 +1,25 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+
+[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program) {
+ Bindings binding;
+ return EmitGLASM(profile, runtime_info, program, binding);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
new file mode 100644
index 000000000..9201ccd39
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -0,0 +1,91 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+static void Alias(IR::Inst& inst, const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())};
+ value_inst.DestructiveAddUsage(inst.UseCount());
+ value_inst.DestructiveRemoveUsage();
+ inst.SetDefinition(value_inst.Definition<Id>());
+}
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+ // Fake one usage to get a real register out of the condition
+ inst.DestructiveAddUsage(1);
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const ScalarS32 input{ctx.reg_alloc.Consume(value)};
+ if (ret != input) {
+ ctx.Add("MOV.S {},{};", ret, input);
+ }
+}
+
+void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("PK64.U {}.x,{};", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP64.U {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("PK2H {}.x,{};", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP2H {}.xy,{}.x;", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("PK64 {}.x,{};", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.Add("UP64 {}.xy,{}.x;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
new file mode 100644
index 000000000..bff0b7c1c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -0,0 +1,244 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <auto read_imm, char type, typename... Values>
+void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (std::ranges::any_of(std::array{elements...},
+ [](const IR::Value& value) { return value.IsImmediate(); })) {
+ using Type = std::invoke_result_t<decltype(read_imm), IR::Value>;
+ const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...};
+ ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]),
+ fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3]));
+ }
+ size_t index{};
+ for (const IR::Value& element : {elements...}) {
+ if (!element.IsImmediate()) {
+ const ScalarU32 value{ctx.reg_alloc.Consume(element)};
+ ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value);
+ }
+ ++index;
+ }
+}
+
+void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ret == composite && index == 0) {
+ // No need to do anything here, the source and destination are the same register
+ return;
+ }
+ ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]);
+}
+
+template <typename ObjectType>
+void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object,
+ u32 index, char type) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ const char swizzle{"xyzw"[index]};
+ if (ret != composite && ret == object) {
+ // The object is aliased with the return value, so we have to use a temporary to insert
+ ctx.Add("MOV.{} RC,{};"
+ "MOV.{} RC.{},{};"
+ "MOV.{} {},RC;",
+ type, composite, type, swizzle, object, type, ret);
+ } else if (ret != composite) {
+ // The input composite is not aliased with the return value so we have to copy it before
+ // hand. But the insert object is not aliased with the return value, so we don't have to
+ // worry about that
+ ctx.Add("MOV.{} {},{};"
+ "MOV.{} {}.{},{};",
+ type, ret, composite, type, ret, swizzle, object);
+ } else {
+ // The return value is alised so we can just insert the object, it doesn't matter if it's
+ // aliased
+ ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object);
+ }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+ CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'U');
+}
+
+void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite,
+ [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1,
+ [[maybe_unused]] Register e2, [[maybe_unused]] Register e3,
+ [[maybe_unused]] Register e4) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) {
+ CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) {
+ CompositeExtract(ctx, inst, composite, index, 'F');
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index) {
+ CompositeInsert(ctx, inst, composite, object, index, 'F');
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] Register composite, [[maybe_unused]] Register object,
+ [[maybe_unused]] u32 index) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
new file mode 100644
index 000000000..02c9dc6d7
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -0,0 +1,346 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ std::string_view size) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Indirect constant buffer loading");
+ }
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset.type == Type::U32) {
+ // Avoid reading arrays out of bounds, matching hardware's behavior
+ if (offset.imm_u32 >= 0x10'000) {
+ ctx.Add("MOV.S {},0;", ret);
+ return;
+ }
+ }
+ ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset);
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+
+std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
+ return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+ return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U8");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "S8");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U16");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "S16");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U32");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "F32");
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ GetCbuf(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ ctx.Add("MOV.S {}.x,primitive.id;", inst);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ if (IsInputArray(ctx.stage)) {
+ ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle);
+ } else {
+ ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle);
+ }
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle);
+ break;
+ case IR::Attribute::PointSpriteS:
+ case IR::Attribute::PointSpriteT:
+ ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle);
+ break;
+ case IR::Attribute::TessellationEvaluationPointU:
+ case IR::Attribute::TessellationEvaluationPointV:
+ ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::InstanceId:
+ ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::VertexId:
+ ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::FrontFace:
+ ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
+ break;
+ default:
+ throw NotImplementedException("Get attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value,
+ [[maybe_unused]] ScalarU32 vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::Layer:
+ if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.layer.x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM,
+ "Layer stored outside of geometry shader not supported by device");
+ }
+ break;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.viewport.x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM,
+ "Viewport stored outside of geometry shader not supported by device");
+ }
+ break;
+ case IR::Attribute::ViewportMask:
+ // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage.
+ if (ctx.profile.support_viewport_index_layer_non_geometry) {
+ ctx.Add("MOV.F result.viewportmask[0].x,{};", value);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask");
+ }
+ break;
+ case IR::Attribute::PointSize:
+ ctx.Add("MOV.F result.pointsize.x,{};", value);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ ctx.Add("MOV.F result.position.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("MOV.F result.color.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontSpecularR:
+ case IR::Attribute::ColorFrontSpecularG:
+ case IR::Attribute::ColorFrontSpecularB:
+ case IR::Attribute::ColorFrontSpecularA:
+ ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackDiffuseR:
+ case IR::Attribute::ColorBackDiffuseG:
+ case IR::Attribute::ColorBackDiffuseB:
+ case IR::Attribute::ColorBackDiffuseA:
+ ctx.Add("MOV.F result.color.back.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackSpecularR:
+ case IR::Attribute::ColorBackSpecularG:
+ case IR::Attribute::ColorBackSpecularB:
+ case IR::Attribute::ColorBackSpecularA:
+ ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value);
+ break;
+ case IR::Attribute::FogCoordinate:
+ ctx.Add("MOV.F result.fogcoord.x,{};", value);
+ break;
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+ ctx.Add("MOV.F result.clip[{}].x,{};", index, value);
+ break;
+ }
+ default:
+ throw NotImplementedException("Set attribute {}", attr);
+ }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) {
+ // RC.x = base_index
+ // RC.y = masked_index
+ // RC.z = compare_index
+ ctx.Add("SHR.S RC.x,{},2;"
+ "AND.S RC.y,RC.x,3;"
+ "SHR.S RC.z,{},4;",
+ offset, offset);
+
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ u32 num_endifs{};
+ const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) {
+ ++num_endifs;
+ ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index
+ "IF NE.w;"
+ // X
+ "SEQ.S.CC RC.w,RC.y,0;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // Y
+ "SEQ.S.CC RC.w,RC.y,1;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // Z
+ "SEQ.S.CC RC.w,RC.y,2;"
+ "IF NE.w;"
+ "MOV {}.x,{};"
+ "ELSE;"
+ // W
+ "MOV {}.x,{};"
+ "ENDIF;"
+ "ENDIF;"
+ "ENDIF;"
+ "ELSE;",
+ compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]);
+ }};
+ const auto read_swizzled{[&](u32 compare_index, std::string_view value) {
+ const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value),
+ fmt::format("{}.z", value), fmt::format("{}.w", value)};
+ read(compare_index, values);
+ }};
+ if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ const u32 index{static_cast<u32>(IR::Attribute::PositionX)};
+ if (IsInputArray(ctx.stage)) {
+ read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex)));
+ } else {
+ read_swizzled(index, fmt::format("{}.position", ctx.attrib_name));
+ }
+ }
+ for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+ if (!ctx.info.loads.Generic(index)) {
+ continue;
+ }
+ read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex)));
+ }
+ for (u32 i = 0; i < num_endifs; ++i) {
+ ctx.Add("ENDIF;");
+ }
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset,
+ [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ const char swizzle{"xyzw"[element]};
+ const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""};
+ ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value);
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value);
+ break;
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value);
+ break;
+ case IR::Patch::TessellationLodInteriorV:
+ ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value);
+ break;
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) {
+ ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) {
+ ctx.Add("MOV.S result.samplemask.x,{};", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) {
+ ctx.Add("MOV.F result.depth.z,{};", value);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) {
+ ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) {
+ ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
new file mode 100644
index 000000000..ccdf1cbc8
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp
@@ -0,0 +1,231 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+std::string_view FpRounding(IR::FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case IR::FpRounding::DontCare:
+ return "";
+ case IR::FpRounding::RN:
+ return ".ROUND";
+ case IR::FpRounding::RZ:
+ return ".TRUNC";
+ case IR::FpRounding::RM:
+ return ".FLR";
+ case IR::FpRounding::RP:
+ return ".CEIL";
+ }
+ throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding);
+}
+
+template <typename InputType>
+void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest,
+ std::string_view src, bool is_long_result) {
+ const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)};
+ const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)};
+ ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value);
+}
+} // Anonymous namespace
+
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S16", "F16", false);
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S16", "F32", false);
+}
+
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S16", "F64", false);
+}
+
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S32", "F16", false);
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S32", "F32", false);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S32", "F64", false);
+}
+
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "S64", "F16", true);
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "S64", "F32", true);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "S64", "F64", true);
+}
+
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U16", "F16", false);
+}
+
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U16", "F32", false);
+}
+
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U16", "F64", false);
+}
+
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U32", "F16", false);
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U32", "F32", false);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U32", "F64", false);
+}
+
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U64", "F16", true);
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "U64", "F32", true);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "U64", "F64", true);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "U64", "U32", true);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "U32", "U64", false);
+}
+
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "F16", "F32", false);
+}
+
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "F16", false);
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Convert(ctx, inst, value, "F32", "F64", false);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Convert(ctx, inst, value, "F64", "F32", true);
+}
+
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S8", false);
+}
+
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S16", false);
+}
+
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F16", "S32", false);
+}
+
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "S64", false);
+}
+
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U8", false);
+}
+
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U16", false);
+}
+
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F16", "U32", false);
+}
+
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F16", "U64", false);
+}
+
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S8", false);
+}
+
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S16", false);
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F32", "S32", false);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "S64", false);
+}
+
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U8", false);
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U16", false);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F32", "U32", false);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F32", "U64", false);
+}
+
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S8", true);
+}
+
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S16", true);
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ Convert(ctx, inst, value, "F64", "S32", true);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "S64", true);
+}
+
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U8", true);
+}
+
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U16", true);
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ Convert(ctx, inst, value, "F64", "U32", true);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ Convert(ctx, inst, value, "F64", "U64", true);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
new file mode 100644
index 000000000..4ed58619d
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp
@@ -0,0 +1,414 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+template <typename InputType>
+void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op,
+ std::string_view type, bool ordered, bool inequality = false) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs);
+ if (ordered && inequality) {
+ ctx.Add("SEQ.{} RC.y,{},{};"
+ "SEQ.{} RC.z,{},{};"
+ "AND.U RC.x,RC.x,RC.y;"
+ "AND.U RC.x,RC.x,RC.z;"
+ "SNE.S {}.x,RC.x,0;",
+ type, lhs, lhs, type, rhs, rhs, ret);
+ } else if (ordered) {
+ ctx.Add("SNE.S {}.x,RC.x,0;", ret);
+ } else {
+ ctx.Add("SNE.{} RC.y,{},{};"
+ "SNE.{} RC.z,{},{};"
+ "OR.U RC.x,RC.x,RC.y;"
+ "OR.U RC.x,RC.x,RC.z;"
+ "SNE.S {}.x,RC.x,0;",
+ type, lhs, lhs, type, rhs, rhs, ret);
+ }
+}
+
+template <typename InputType>
+void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value,
+ InputType max_value, std::string_view type) {
+ // Call MAX first to properly clamp nan to min_value instead
+ ctx.Add("MAX.{} RC.x,{},{};"
+ "MIN.{} {}.x,RC.x,{};",
+ type, min_value, value, type, ret, max_value);
+}
+
+std::string_view Precise(IR::Inst& inst) {
+ const bool precise{inst.Flags<IR::FpControl>().no_contraction};
+ return precise ? ".PREC" : "";
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("MOV.F {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b,
+ [[maybe_unused]] Register c) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) {
+ ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c);
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) {
+ ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c);
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MAX.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MIN.F {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] Register a, [[maybe_unused]] Register b) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) {
+ ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b);
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) {
+ ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b);
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) {
+ ctx.Add("MOV.F {}.x,-{};", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("SIN {}.x,{};", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("COS {}.x,{};", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("EX2 {}.x,{};", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("LG2 {}.x,{};", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("RCP {}.x,{};", inst, value);
+}
+
+void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("RSQ {}.x,{};", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("MOV.F.SAT {}.x,{};", inst, value);
+}
+
+void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value,
+ [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+ ScalarF32 max_value) {
+ Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F");
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+ ScalarF64 max_value) {
+ Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64");
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("ROUND.F {}.x,{};", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("FLR.F {}.x,{};", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("FLR.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("CEIL.F {}.x,{};", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ ctx.Add("TRUNC.F {}.x,{};", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F64", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLT", "F64", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F64", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGT", "F64", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F64", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SLE", "F64", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F64", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs,
+ [[maybe_unused]] Register rhs) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) {
+ Compare(ctx, inst, lhs, rhs, "SGE", "F64", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) {
+ Compare(ctx, inst, value, value, "SNE", "F", true, false);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) {
+ Compare(ctx, inst, value, value, "SNE", "F64", true, false);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+struct ScopedRegister {
+ ScopedRegister() = default;
+ ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
+
+ ~ScopedRegister() {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ }
+
+ ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
+ reg = rhs.reg;
+ return *this;
+ }
+
+ ScopedRegister(ScopedRegister&& rhs) noexcept
+ : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
+
+ ScopedRegister& operator=(const ScopedRegister&) = delete;
+ ScopedRegister(const ScopedRegister&) = delete;
+
+ RegAlloc* reg_alloc{};
+ Register reg;
+};
+
+std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string_view TextureType(IR::TextureInstInfo info) {
+ if (info.is_depth) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "SHADOW1D";
+ case TextureType::ColorArray1D:
+ return "SHADOWARRAY1D";
+ case TextureType::Color2D:
+ return "SHADOW2D";
+ case TextureType::ColorArray2D:
+ return "SHADOWARRAY2D";
+ case TextureType::Color3D:
+ return "SHADOW3D";
+ case TextureType::ColorCube:
+ return "SHADOWCUBE";
+ case TextureType::ColorArrayCube:
+ return "SHADOWARRAYCUBE";
+ case TextureType::Buffer:
+ return "SHADOWBUFFER";
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "1D";
+ case TextureType::ColorArray1D:
+ return "ARRAY1D";
+ case TextureType::Color2D:
+ return "2D";
+ case TextureType::ColorArray2D:
+ return "ARRAY2D";
+ case TextureType::Color3D:
+ return "3D";
+ case TextureType::ColorCube:
+ return "CUBE";
+ case TextureType::ColorArrayCube:
+ return "ARRAYCUBE";
+ case TextureType::Buffer:
+ return "BUFFER";
+ }
+ }
+ throw InvalidArgument("Invalid texture type {}", info.type.Value());
+}
+
+std::string Offset(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsEmpty()) {
+ return "";
+ }
+ return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
+}
+
+std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
+ const IR::Value& offset2) {
+ if (offset2.IsEmpty()) {
+ return {};
+ } else {
+ return {ctx.reg_alloc, ctx.reg_alloc};
+ }
+}
+
+void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
+ const IR::Value& offset2) {
+ const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
+ const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
+ // Input swizzle: [XYXY] [XYXY]
+ // Output swizzle: [XXXX] [YYYY]
+ ctx.Add("MOV {}.x,{}.x;"
+ "MOV {}.y,{}.z;"
+ "MOV {}.z,{}.x;"
+ "MOV {}.w,{}.z;"
+ "MOV {}.x,{}.y;"
+ "MOV {}.y,{}.w;"
+ "MOV {}.z,{}.y;"
+ "MOV {}.w,{}.w;",
+ off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
+ offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
+}
+
+std::string GradOffset(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
+ return "";
+ }
+ IR::Inst* const vector{offset.InstRecursive()};
+ if (!vector->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
+ return "";
+ }
+ switch (vector->NumArgs()) {
+ case 1:
+ return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
+ case 2:
+ return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
+ static_cast<s32>(vector->Arg(1).U32()));
+ default:
+ throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
+ }
+}
+
+std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
+ if (coord.IsImmediate()) {
+ ScopedRegister scoped_reg(ctx.reg_alloc);
+ ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
+ return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
+ }
+ std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
+ if (coord.InstRecursive()->HasUses()) {
+ // Move non-dead coords to a separate register, although this should never happen because
+ // vectors are only assembled for immediate texture instructions
+ ctx.Add("MOV.F RC,{};", coord_vec);
+ coord_vec = "RC";
+ }
+ return {std::move(coord_vec), ScopedRegister{}};
+}
+
+void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
+ if (!sparse_inst) {
+ return;
+ }
+ const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
+ ctx.Add("MOV.S {},-1;"
+ "MOV.S {}(NONRESIDENT),0;",
+ sparse_ret, sparse_ret);
+}
+
+std::string_view FormatStorage(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return "U";
+ case ImageFormat::R8_UINT:
+ return "U8";
+ case ImageFormat::R8_SINT:
+ return "S8";
+ case ImageFormat::R16_UINT:
+ return "U16";
+ case ImageFormat::R16_SINT:
+ return "S16";
+ case ImageFormat::R32_UINT:
+ return "U32";
+ case ImageFormat::R32G32_UINT:
+ return "U32X2";
+ case ImageFormat::R32G32B32A32_UINT:
+ return "U32X4";
+ }
+ throw InvalidArgument("Invalid image format {}", format);
+}
+
+template <typename T>
+void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
+ std::string_view op) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ if (info.has_lod_clamp) {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ }
+ } else {
+ if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.has_lod_clamp) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ bias_lc_vec, texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {} with bias and lod clamp",
+ info.type.Value());
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{}.x;"
+ "TXB.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ }
+ } else {
+ if (info.has_lod_clamp) {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ }
+ } else {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F{} {},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ } else {
+ ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
+ type, offset_vec);
+ }
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const char comp{"xyzw"[info.gather_component]};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
+ offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
+ texture, comp, type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref) {
+ // FIXME: This instruction is not working as expected
+
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ std::string args;
+ switch (info.type) {
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ args = fmt::format("{},{}", coord_vec, dref_value);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
+ type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::Buffer) {
+ ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else if (ms.type != Type::Void) {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXFMS.F{} {},{},{},{}{};",
+ coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXF.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister dpdx, dpdy;
+ const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ if (multi_component) {
+ // Allocate this early to avoid aliasing other registers
+ dpdx = ScopedRegister{ctx.reg_alloc};
+ dpdy = ScopedRegister{ctx.reg_alloc};
+ }
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{GradOffset(offset)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (multi_component) {
+ ctx.Add("MOV.F {}.x,{}.x;"
+ "MOV.F {}.y,{}.z;"
+ "MOV.F {}.x,{}.y;"
+ "MOV.F {}.y,{}.w;",
+ dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
+ dpdy.reg, derivatives_vec);
+ if (info.has_lod_clamp) {
+ const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
+ ctx.Add("MOV.F {}.w,{};"
+ "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
+ dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ }
+ } else {
+ ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
+ derivatives_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
+}
+
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
+}
+
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
new file mode 100644
index 000000000..12afda43b
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -0,0 +1,625 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext&, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value);
+void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value);
+void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset);
+void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address);
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value);
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value);
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset);
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value);
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3,
+ Register e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1,
+ const IR::Value& e2, const IR::Value& e3, const IR::Value& e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite,
+ ScalarF32 object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+ Register false_value);
+void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value);
+void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackFloat2x16(EmitContext& ctx, Register value);
+void EmitUnpackFloat2x16(EmitContext& ctx, Register value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b);
+void EmitFPNeg16(EmitContext& ctx, Register value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecip64(EmitContext& ctx, Register value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRecipSqrt64(EmitContext& ctx, Register value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate16(EmitContext& ctx, Register value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPSaturate64(EmitContext& ctx, Register value);
+void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value,
+ ScalarF32 max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value,
+ ScalarF64 max_value);
+void EmitFPRoundEven16(EmitContext& ctx, Register value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPFloor16(EmitContext& ctx, Register value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPCeil16(EmitContext& ctx, Register value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPTrunc16(EmitContext& ctx, Register value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs);
+void EmitFPIsNan16(EmitContext& ctx, Register value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarS32 shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+ ScalarS32 offset, ScalarS32 count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+ ScalarS32 count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+ ScalarU32 count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ Register value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarF32 value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+ ScalarU32 swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
new file mode 100644
index 000000000..f55c26b76
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -0,0 +1,294 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b,
+ std::string_view lop) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b);
+ if (zero) {
+ ctx.Add("SEQ.S {},{},0;", *zero, ret);
+ }
+ if (sign) {
+ ctx.Add("SLT.S {},{},0;", *sign, ret);
+ }
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ const std::array flags{
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
+ };
+ for (IR::Inst* const flag_inst : flags) {
+ if (flag_inst) {
+ flag_inst->Invalidate();
+ }
+ }
+ const bool cc{inst.HasAssociatedPseudoOperation()};
+ const std::string_view cc_mod{cc ? ".CC" : ""};
+ if (cc) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b);
+ if (!cc) {
+ return;
+ }
+ static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
+ for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
+ if (!flags[flag_index]) {
+ continue;
+ }
+ const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
+ if (flag_index == 0) {
+ ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
+ } else {
+ // We could use conditional execution here, but it's broken on Nvidia's compiler
+ ctx.Add("IF {}.x;"
+ "MOV.S {}.x,-1;"
+ "ELSE;"
+ "MOV.S {}.x,0;"
+ "ENDIF;",
+ masks[flag_index], flag_ret, flag_ret);
+ }
+ }
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+ ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("SUB.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) {
+ ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MUL.S {}.x,{},{};", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) {
+ ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32));
+ } else {
+ ctx.Add("MOV.S {},-{};", inst, value);
+ }
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) {
+ ctx.LongAdd("MOV.S64 {},-{};", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("ABS.S {},{};", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+ ctx.Add("SHL.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift) {
+ ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) {
+ ctx.Add("SHR.U {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarU32 shift) {
+ ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) {
+ ctx.Add("SHR.S {}.x,{},{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base,
+ ScalarS32 shift) {
+ ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "AND");
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "OR");
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ BitwiseLogicalOp(ctx, inst, a, b, "XOR");
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert,
+ ScalarS32 offset, ScalarS32 count) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base);
+ } else {
+ ctx.Add("MOV.S RC.x,{};"
+ "MOV.S RC.y,{};"
+ "BFI.S {},RC,{},{};",
+ count, offset, ret, insert, base);
+ }
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset,
+ ScalarS32 count) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base);
+ } else {
+ ctx.Add("MOV.S RC.x,{};"
+ "MOV.S RC.y,{};"
+ "BFE.S {},RC,{};",
+ count, offset, ret, base);
+ }
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
+ ScalarU32 count) {
+ const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
+ const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
+ if (zero) {
+ zero->Invalidate();
+ }
+ if (sign) {
+ sign->Invalidate();
+ }
+ if (zero || sign) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ }
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (count.type != Type::Register && offset.type != Type::Register) {
+ ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
+ } else {
+ ctx.Add("MOV.U RC.x,{};"
+ "MOV.U RC.y,{};"
+ "BFE.U {},RC,{};",
+ count, offset, ret, base);
+ }
+ if (zero) {
+ ctx.Add("SEQ.S {},{},0;", *zero, ret);
+ }
+ if (sign) {
+ ctx.Add("SLT.S {},{},0;", *sign, ret);
+ }
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BFR {},{};", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BTC {},{};", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("NOT.S {},{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("BTFM.S {},{};", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) {
+ ctx.Add("BTFM.U {},{};", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MIN.S {},{},{};", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+ ctx.Add("MIN.U {},{},{};", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("MAX.S {},{},{};", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) {
+ ctx.Add("MAX.U {},{},{};", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("MIN.S RC.x,{},{};"
+ "MAX.S {}.x,RC.x,{};",
+ max, value, ret, min);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("MIN.U RC.x,{},{};"
+ "MAX.U {}.x,RC.x,{};",
+ max, value, ret, min);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) {
+ ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) {
+ ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
new file mode 100644
index 000000000..af9fac7c1
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -0,0 +1,568 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ std::string_view then_expr, std::string_view else_expr = {}) {
+ // Operate on bindless SSBO, call the expression with bounds checking
+ // address = c[binding].xy
+ // length = c[binding].z
+ const u32 sb_binding{binding.U32()};
+ ctx.Add("PK64.U DC,c[{}];" // pointer = address
+ "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
+ "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
+ "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
+ sb_binding, offset, offset, sb_binding);
+ if (else_expr.empty()) {
+ ctx.Add("IF NE.x;{}ENDIF;", then_expr);
+ } else {
+ ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
+ }
+}
+
+void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
+ std::string_view else_expr = {}) {
+ const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!ctx.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const auto& ssbo{ctx.info.storage_buffers_descriptors[index]};
+ ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr
+ "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32
+ "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32
+ "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size
+ "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0
+ "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0
+ "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b
+ "IF NE.x;" // if cond
+ "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
+ ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
+ address, address);
+ if (pointer_based) {
+ ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
+ "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
+ "{}"
+ "ELSE;",
+ index, expr);
+ } else {
+ ctx.Add("CVT.U32.U64 RC.x,DC.x;"
+ "{},ssbo{}[RC.x];"
+ "ELSE;",
+ expr, index);
+ }
+ }
+ if (!else_expr.empty()) {
+ ctx.Add("{}", else_expr);
+ }
+ const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()};
+ for (size_t index = 0; index < num_used_buffers; ++index) {
+ ctx.Add("ENDIF;");
+ }
+}
+
+template <typename ValueType>
+void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
+ std::string_view size) {
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
+ } else {
+ StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
+ }
+}
+
+void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
+ } else {
+ StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
+ fmt::format("MOV.U {},{{0,0,0,0}};", ret));
+ }
+}
+
+template <typename ValueType>
+void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
+ } else {
+ GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
+ }
+}
+
+void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
+ } else {
+ GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
+ fmt::format("MOV.S {},0;", ret));
+ }
+}
+
+template <typename ValueType>
+void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
+ ValueType value, std::string_view operation, std::string_view size) {
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (ctx.runtime_info.glasm_use_storage_buffers) {
+ ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
+ offset);
+ } else {
+ StorageOp(ctx, binding, offset,
+ fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
+ }
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U8");
+}
+
+void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "S8");
+}
+
+void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U16");
+}
+
+void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "S16");
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32");
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32X2");
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) {
+ GlobalLoad(ctx, inst, address, "U32X4");
+}
+
+void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U8");
+}
+
+void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "S8");
+}
+
+void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U16");
+}
+
+void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "S16");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) {
+ GlobalWrite(ctx, address, value, "U32");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U32X2");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) {
+ GlobalWrite(ctx, address, value, "U32X4");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U8");
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "S8");
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U16");
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "S16");
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32");
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32X2");
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset) {
+ Load(ctx, inst, binding, offset, "U32X4");
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U8");
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value) {
+ Write(ctx, binding, offset, value, "S8");
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U16");
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarS32 value) {
+ Write(ctx, binding, offset, value, "S16");
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ ScalarU32 value) {
+ Write(ctx, binding, offset, value, "U32");
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value) {
+ Write(ctx, binding, offset, value, "U32X2");
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
+ Register value) {
+ Write(ctx, binding, offset, value, "U32X4");
+}
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value) {
+ ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarS32 value) {
+ ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ ScalarU32 value) {
+ ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
+ Register value) {
+ ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "U32");
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "S32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "U32");
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarS32 value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "S32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "U32");
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "AND", "U32");
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "OR", "U32");
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "XOR", "U32");
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarU32 value) {
+ Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "U64");
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "S64");
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "U64");
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "S64");
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "U64");
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "AND", "U64");
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "OR", "U64");
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "XOR", "U64");
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, ScalarF32 value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "F32");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
+}
+
+void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
+}
+
+void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ ScalarU32 offset, Register value) {
+ Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
+}
+
+void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] const IR::Value& binding,
+ [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
new file mode 100644
index 000000000..ff64c6924
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
@@ -0,0 +1,273 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLASM {
+
+#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
+
+static void DefinePhi(EmitContext& ctx, IR::Inst& phi) {
+ switch (phi.Arg(0).Type()) {
+ case IR::Type::U1:
+ case IR::Type::U32:
+ case IR::Type::F32:
+ ctx.reg_alloc.Define(phi);
+ break;
+ case IR::Type::U64:
+ case IR::Type::F64:
+ ctx.reg_alloc.LongDefine(phi);
+ break;
+ default:
+ throw NotImplementedException("Phi node type {}", phi.Type());
+ }
+}
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ ctx.reg_alloc.Consume(phi.Arg(i));
+ }
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ DefinePhi(ctx, phi);
+ }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+ ctx.reg_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+ IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())};
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ DefinePhi(ctx, phi);
+ }
+ const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})};
+ const Value eval_value{ctx.reg_alloc.Consume(value)};
+
+ if (phi_reg == eval_value) {
+ return;
+ }
+ switch (phi.Flags<IR::Type>()) {
+ case IR::Type::U1:
+ case IR::Type::U32:
+ case IR::Type::F32:
+ ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value});
+ break;
+ case IR::Type::U64:
+ case IR::Type::F64:
+ ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value});
+ break;
+ default:
+ throw NotImplementedException("Phi node type {}", phi.Type());
+ }
+}
+
+void EmitJoin(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ ctx.Add("KIL TR.x;");
+}
+
+void EmitBarrier(EmitContext& ctx) {
+ ctx.Add("BAR;");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("MEMBAR.CTA;");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("MEMBAR;");
+}
+
+void EmitPrologue(EmitContext& ctx) {
+ // TODO
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+ // TODO
+}
+
+void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) {
+ if (stream.type == Type::U32 && stream.imm_u32 == 0) {
+ ctx.Add("EMIT;");
+ } else {
+ ctx.Add("EMITS {};", stream);
+ }
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ if (!stream.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Stream is not immediate");
+ }
+ ctx.reg_alloc.Consume(stream);
+ ctx.Add("ENDPRIM;");
+}
+
+void EmitGetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {},invocation.groupid;", inst);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {},invocation.localid;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+ ctx.uses_y_direction = true;
+ ctx.Add("MOV.F {}.x,y_direction[0].w;", inst);
+}
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,0;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+ ctx.LongAdd("MOV.S64 {}.x,0;", inst);
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("OR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("AND.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
+ ctx.Add("XOR.S {},{},{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) {
+ ctx.Add("SEQ.S {},{},0;", inst, value);
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
new file mode 100644
index 000000000..68fff613c
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp
@@ -0,0 +1,67 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value,
+ Register false_value) {
+ ctx.reg_alloc.InvalidateConditionCodes();
+ const Register ret{ctx.reg_alloc.LongDefine(inst)};
+ if (ret == true_value) {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x(EQ.x),{};",
+ cond, ret, false_value);
+ } else if (ret == false_value) {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x(NE.x),{};",
+ cond, ret, true_value);
+ } else {
+ ctx.Add("MOV.S.CC RC.x,{};"
+ "MOV.U64 {}.x,{};"
+ "MOV.U64 {}.x(NE.x),{};",
+ cond, ret, false_value, ret, true_value);
+ }
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value,
+ ScalarS32 false_value) {
+ ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond,
+ [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) {
+ throw NotImplementedException("GLASM instruction");
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
new file mode 100644
index 000000000..c1498f449
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp
@@ -0,0 +1,58 @@
+
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) {
+ ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U8 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U16 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) {
+ ctx.Add("STS.U32 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) {
+ ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) {
+ ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset);
+}
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
new file mode 100644
index 000000000..544d475b4
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLASM {
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGALL.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGANY.S {}.x,{};", inst, pred);
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGEQ.S {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) {
+ ctx.Add("TGBALLOT {}.x,{};", inst, pred);
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name);
+}
+
+static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask,
+ std::string_view op) {
+ IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (in_bounds) {
+ in_bounds->Invalidate();
+ }
+ std::string mask;
+ if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
+ mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
+ } else {
+ mask = "RC";
+ ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};",
+ ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)},
+ ScalarU32{ctx.reg_alloc.Consume(clamp)});
+ }
+ const Register value_ret{ctx.reg_alloc.Define(inst)};
+ if (in_bounds) {
+ const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
+ ctx.Add("SHF{}.U {},{},{},{};"
+ "MOV.U {}.x,{}.y;",
+ op, bounds_ret, value, index, mask, value_ret, bounds_ret);
+ } else {
+ ctx.Add("SHF{}.U {},{},{},{};"
+ "MOV.U {}.x,{}.y;",
+ op, value_ret, value, index, mask, value_ret, value_ret);
+ }
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX");
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP");
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN");
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
+ const IR::Value& clamp, const IR::Value& segmentation_mask) {
+ Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
+ ScalarU32 swizzle) {
+ const auto ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("AND.U RC.z,{}.threadid,3;"
+ "SHL.U RC.z,RC.z,1;"
+ "SHR.U RC.z,{},RC.z;"
+ "AND.U RC.z,RC.z,3;"
+ "MUL.F RC.x,{},FSWZA[RC.z];"
+ "MUL.F RC.y,{},FSWZB[RC.z];"
+ "ADD.F {}.x,RC.x,RC.y;",
+ ctx.stage_name, swizzle, op_a, op_b, ret);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDX.FINE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+ ctx.Add("DDX {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDY.FINE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device");
+ ctx.Add("DDY {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDX.COARSE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+ ctx.Add("DDX {}.x,{};", inst, p);
+ }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
+ if (ctx.profile.support_derivative_control) {
+ ctx.Add("DDY.COARSE {}.x,{};", inst, p);
+ } else {
+ LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device");
+ ctx.Add("DDY {}.x,{};", inst, p);
+ }
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
new file mode 100644
index 000000000..4c046db6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -0,0 +1,186 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/reg_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+
+Register RegAlloc::Define(IR::Inst& inst) {
+ return Define(inst, false);
+}
+
+Register RegAlloc::LongDefine(IR::Inst& inst) {
+ return Define(inst, true);
+}
+
+Value RegAlloc::Peek(const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return PeekInst(*value.Inst());
+ }
+}
+
+Value RegAlloc::Consume(const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return MakeImm(value);
+ } else {
+ return ConsumeInst(*value.Inst());
+ }
+}
+
+void RegAlloc::Unref(IR::Inst& inst) {
+ IR::Inst& value_inst{AliasInst(inst)};
+ value_inst.DestructiveRemoveUsage();
+ if (!value_inst.HasUses()) {
+ Free(value_inst.Definition<Id>());
+ }
+}
+
+Register RegAlloc::AllocReg() {
+ Register ret;
+ ret.type = Type::Register;
+ ret.id = Alloc(false);
+ return ret;
+}
+
+Register RegAlloc::AllocLongReg() {
+ Register ret;
+ ret.type = Type::Register;
+ ret.id = Alloc(true);
+ return ret;
+}
+
+void RegAlloc::FreeReg(Register reg) {
+ Free(reg.id);
+}
+
+Value RegAlloc::MakeImm(const IR::Value& value) {
+ Value ret;
+ switch (value.Type()) {
+ case IR::Type::Void:
+ ret.type = Type::Void;
+ break;
+ case IR::Type::U1:
+ ret.type = Type::U32;
+ ret.imm_u32 = value.U1() ? 0xffffffff : 0;
+ break;
+ case IR::Type::U32:
+ ret.type = Type::U32;
+ ret.imm_u32 = value.U32();
+ break;
+ case IR::Type::F32:
+ ret.type = Type::U32;
+ ret.imm_u32 = Common::BitCast<u32>(value.F32());
+ break;
+ case IR::Type::U64:
+ ret.type = Type::U64;
+ ret.imm_u64 = value.U64();
+ break;
+ case IR::Type::F64:
+ ret.type = Type::U64;
+ ret.imm_u64 = Common::BitCast<u64>(value.F64());
+ break;
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+ return ret;
+}
+
+Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(is_long));
+ } else {
+ Id id{};
+ id.is_long.Assign(is_long ? 1 : 0);
+ id.is_null.Assign(1);
+ inst.SetDefinition<Id>(id);
+ }
+ return Register{PeekInst(inst)};
+}
+
+Value RegAlloc::PeekInst(IR::Inst& inst) {
+ Value ret;
+ ret.type = Type::Register;
+ ret.id = inst.Definition<Id>();
+ return ret;
+}
+
+Value RegAlloc::ConsumeInst(IR::Inst& inst) {
+ Unref(inst);
+ return PeekInst(inst);
+}
+
+Id RegAlloc::Alloc(bool is_long) {
+ size_t& num_regs{is_long ? num_used_long_registers : num_used_registers};
+ std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use};
+ if (num_used_registers + num_used_long_registers < NUM_REGS) {
+ for (size_t reg = 0; reg < NUM_REGS; ++reg) {
+ if (use[reg]) {
+ continue;
+ }
+ num_regs = std::max(num_regs, reg + 1);
+ use[reg] = true;
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.is_long.Assign(is_long ? 1 : 0);
+ ret.is_spill.Assign(0);
+ ret.is_condition_code.Assign(0);
+ ret.is_null.Assign(0);
+ ret.index.Assign(static_cast<u32>(reg));
+ return ret;
+ }
+ }
+ throw NotImplementedException("Register spilling");
+}
+
+void RegAlloc::Free(Id id) {
+ if (id.is_valid == 0) {
+ throw LogicError("Freeing invalid register");
+ }
+ if (id.is_spill != 0) {
+ throw NotImplementedException("Free spill");
+ }
+ if (id.is_long != 0) {
+ long_register_use[id.index] = false;
+ } else {
+ register_use[id.index] = false;
+ }
+}
+
+/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::Identity:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastU32F32:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::BitCastF32U32:
+ case IR::Opcode::BitCastF64U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) {
+ IR::Inst* it{&inst};
+ while (IsAliased(*it)) {
+ const IR::Value arg{it->Arg(0)};
+ if (arg.IsImmediate()) {
+ break;
+ }
+ it = arg.InstRecursive();
+ }
+ return *it;
+}
+
+} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
new file mode 100644
index 000000000..82aec66c6
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -0,0 +1,303 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+
+#include <fmt/format.h>
+
+#include "common/bit_cast.h"
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLASM {
+
+class EmitContext;
+
+enum class Type : u32 {
+ Void,
+ Register,
+ U32,
+ U64,
+};
+
+struct Id {
+ union {
+ u32 raw;
+ BitField<0, 1, u32> is_valid;
+ BitField<1, 1, u32> is_long;
+ BitField<2, 1, u32> is_spill;
+ BitField<3, 1, u32> is_condition_code;
+ BitField<4, 1, u32> is_null;
+ BitField<5, 27, u32> index;
+ };
+
+ bool operator==(Id rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+ bool operator!=(Id rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+struct Value {
+ Type type;
+ union {
+ Id id;
+ u32 imm_u32;
+ u64 imm_u64;
+ };
+
+ bool operator==(const Value& rhs) const noexcept {
+ if (type != rhs.type) {
+ return false;
+ }
+ switch (type) {
+ case Type::Void:
+ return true;
+ case Type::Register:
+ return id == rhs.id;
+ case Type::U32:
+ return imm_u32 == rhs.imm_u32;
+ case Type::U64:
+ return imm_u64 == rhs.imm_u64;
+ }
+ return false;
+ }
+ bool operator!=(const Value& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+struct Register : Value {};
+struct ScalarRegister : Value {};
+struct ScalarU32 : Value {};
+struct ScalarS32 : Value {};
+struct ScalarF32 : Value {};
+struct ScalarF64 : Value {};
+
+class RegAlloc {
+public:
+ RegAlloc() = default;
+
+ Register Define(IR::Inst& inst);
+
+ Register LongDefine(IR::Inst& inst);
+
+ [[nodiscard]] Value Peek(const IR::Value& value);
+
+ Value Consume(const IR::Value& value);
+
+ void Unref(IR::Inst& inst);
+
+ [[nodiscard]] Register AllocReg();
+
+ [[nodiscard]] Register AllocLongReg();
+
+ void FreeReg(Register reg);
+
+ void InvalidateConditionCodes() {
+ // This does nothing for now
+ }
+
+ [[nodiscard]] size_t NumUsedRegisters() const noexcept {
+ return num_used_registers;
+ }
+
+ [[nodiscard]] size_t NumUsedLongRegisters() const noexcept {
+ return num_used_long_registers;
+ }
+
+ [[nodiscard]] bool IsEmpty() const noexcept {
+ return register_use.none() && long_register_use.none();
+ }
+
+ /// Returns true if the instruction is expected to be aliased to another
+ static bool IsAliased(const IR::Inst& inst);
+
+ /// Returns the underlying value out of an alias sequence
+ static IR::Inst& AliasInst(IR::Inst& inst);
+
+private:
+ static constexpr size_t NUM_REGS = 4096;
+ static constexpr size_t NUM_ELEMENTS = 4;
+
+ Value MakeImm(const IR::Value& value);
+
+ Register Define(IR::Inst& inst, bool is_long);
+
+ Value PeekInst(IR::Inst& inst);
+
+ Value ConsumeInst(IR::Inst& inst);
+
+ Id Alloc(bool is_long);
+
+ void Free(Id id);
+
+ size_t num_used_registers{};
+ size_t num_used_long_registers{};
+ std::bitset<NUM_REGS> register_use{};
+ std::bitset<NUM_REGS> long_register_use{};
+};
+
+template <bool scalar, typename FormatContext>
+auto FormatTo(FormatContext& ctx, Id id) {
+ if (id.is_condition_code != 0) {
+ throw NotImplementedException("Condition code emission");
+ }
+ if (id.is_spill != 0) {
+ throw NotImplementedException("Spill emission");
+ }
+ if constexpr (scalar) {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
+ }
+ if (id.is_long != 0) {
+ return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
+ } else {
+ return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
+ }
+ } else {
+ if (id.is_null != 0) {
+ return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
+ }
+ if (id.is_long != 0) {
+ return fmt::format_to(ctx.out(), "D{}", id.index.Value());
+ } else {
+ return fmt::format_to(ctx.out(), "R{}", id.index.Value());
+ }
+ }
+}
+
+} // namespace Shader::Backend::GLASM
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Id> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) {
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::Register> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) {
+ if (value.type != Shader::Backend::GLASM::Type::Register) {
+ throw Shader::InvalidArgument("Register value type is not register");
+ }
+ return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) {
+ if (value.type != Shader::Backend::GLASM::Type::Register) {
+ throw Shader::InvalidArgument("Register value type is not register");
+ }
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", value.imm_u32);
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32));
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32));
+ case Shader::Backend::GLASM::Type::U64:
+ break;
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
+
+template <>
+struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) {
+ switch (value.type) {
+ case Shader::Backend::GLASM::Type::Void:
+ break;
+ case Shader::Backend::GLASM::Type::Register:
+ return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id);
+ case Shader::Backend::GLASM::Type::U32:
+ break;
+ case Shader::Backend::GLASM::Type::U64:
+ return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64));
+ }
+ throw Shader::InvalidArgument("Invalid value type {}", value.type);
+ }
+};
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
new file mode 100644
index 000000000..4e6f2c0fe
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -0,0 +1,715 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+u32 CbufIndex(size_t offset) {
+ return (offset / 4) % 4;
+}
+
+char Swizzle(size_t offset) {
+ return "xyzw"[CbufIndex(offset)];
+}
+
+std::string_view InterpDecorator(Interpolation interp) {
+ switch (interp) {
+ case Interpolation::Smooth:
+ return "";
+ case Interpolation::Flat:
+ return "flat ";
+ case Interpolation::NoPerspective:
+ return "noperspective ";
+ }
+ throw InvalidArgument("Invalid interpolation {}", interp);
+}
+
+std::string_view InputArrayDecorator(Stage stage) {
+ switch (stage) {
+ case Stage::Geometry:
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ return "[]";
+ default:
+ return "";
+ }
+}
+
+bool StoresPerVertexAttributes(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ case Stage::Geometry:
+ case Stage::TessellationEval:
+ return true;
+ default:
+ return false;
+ }
+}
+
+std::string OutputDecorator(Stage stage, u32 size) {
+ switch (stage) {
+ case Stage::TessellationControl:
+ return fmt::format("[{}]", size);
+ default:
+ return "";
+ }
+}
+
+std::string_view SamplerType(TextureType type, bool is_depth) {
+ if (is_depth) {
+ switch (type) {
+ case TextureType::Color1D:
+ return "sampler1DShadow";
+ case TextureType::ColorArray1D:
+ return "sampler1DArrayShadow";
+ case TextureType::Color2D:
+ return "sampler2DShadow";
+ case TextureType::ColorArray2D:
+ return "sampler2DArrayShadow";
+ case TextureType::ColorCube:
+ return "samplerCubeShadow";
+ case TextureType::ColorArrayCube:
+ return "samplerCubeArrayShadow";
+ default:
+ throw NotImplementedException("Texture type: {}", type);
+ }
+ }
+ switch (type) {
+ case TextureType::Color1D:
+ return "sampler1D";
+ case TextureType::ColorArray1D:
+ return "sampler1DArray";
+ case TextureType::Color2D:
+ return "sampler2D";
+ case TextureType::ColorArray2D:
+ return "sampler2DArray";
+ case TextureType::Color3D:
+ return "sampler3D";
+ case TextureType::ColorCube:
+ return "samplerCube";
+ case TextureType::ColorArrayCube:
+ return "samplerCubeArray";
+ case TextureType::Buffer:
+ return "samplerBuffer";
+ default:
+ throw NotImplementedException("Texture type: {}", type);
+ }
+}
+
+std::string_view ImageType(TextureType type) {
+ switch (type) {
+ case TextureType::Color1D:
+ return "uimage1D";
+ case TextureType::ColorArray1D:
+ return "uimage1DArray";
+ case TextureType::Color2D:
+ return "uimage2D";
+ case TextureType::ColorArray2D:
+ return "uimage2DArray";
+ case TextureType::Color3D:
+ return "uimage3D";
+ case TextureType::ColorCube:
+ return "uimageCube";
+ case TextureType::ColorArrayCube:
+ return "uimageCubeArray";
+ case TextureType::Buffer:
+ return "uimageBuffer";
+ default:
+ throw NotImplementedException("Image type: {}", type);
+ }
+}
+
+std::string_view ImageFormatString(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return "";
+ case ImageFormat::R8_UINT:
+ return ",r8ui";
+ case ImageFormat::R8_SINT:
+ return ",r8i";
+ case ImageFormat::R16_UINT:
+ return ",r16ui";
+ case ImageFormat::R16_SINT:
+ return ",r16i";
+ case ImageFormat::R32_UINT:
+ return ",r32ui";
+ case ImageFormat::R32G32_UINT:
+ return ",rg32ui";
+ case ImageFormat::R32G32B32A32_UINT:
+ return ",rgba32ui";
+ default:
+ throw NotImplementedException("Image format: {}", format);
+ }
+}
+
+std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
+ if (is_written && !is_read) {
+ return "writeonly ";
+ }
+ if (is_read && !is_written) {
+ return "readonly ";
+ }
+ return "";
+}
+
+std::string_view GetTessMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Triangles:
+ return "triangles";
+ case TessPrimitive::Quads:
+ return "quads";
+ case TessPrimitive::Isolines:
+ return "isolines";
+ }
+ throw InvalidArgument("Invalid tessellation primitive {}", primitive);
+}
+
+std::string_view GetTessSpacing(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return "equal_spacing";
+ case TessSpacing::FractionalOdd:
+ return "fractional_odd_spacing";
+ case TessSpacing::FractionalEven:
+ return "fractional_even_spacing";
+ }
+ throw InvalidArgument("Invalid tessellation spacing {}", spacing);
+}
+
+std::string_view InputPrimitive(InputTopology topology) {
+ switch (topology) {
+ case InputTopology::Points:
+ return "points";
+ case InputTopology::Lines:
+ return "lines";
+ case InputTopology::LinesAdjacency:
+ return "lines_adjacency";
+ case InputTopology::Triangles:
+ return "triangles";
+ case InputTopology::TrianglesAdjacency:
+ return "triangles_adjacency";
+ }
+ throw InvalidArgument("Invalid input topology {}", topology);
+}
+
+std::string_view OutputPrimitive(OutputTopology topology) {
+ switch (topology) {
+ case OutputTopology::PointList:
+ return "points";
+ case OutputTopology::LineStrip:
+ return "line_strip";
+ case OutputTopology::TriangleStrip:
+ return "triangle_strip";
+ }
+ throw InvalidArgument("Invalid output topology {}", topology);
+}
+
+void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) {
+ if (!ctx.info.stores.Legacy()) {
+ return;
+ }
+ if (ctx.info.stores.FixedFunctionTexture()) {
+ header += "vec4 gl_TexCoord[8];";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+ header += "vec4 gl_FrontColor;";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) {
+ header += "vec4 gl_FrontSecondaryColor;";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) {
+ header += "vec4 gl_BackColor;";
+ }
+ if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) {
+ header += "vec4 gl_BackSecondaryColor;";
+ }
+}
+
+void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
+ if (!StoresPerVertexAttributes(ctx.stage)) {
+ return;
+ }
+ if (ctx.uses_geometry_passthrough) {
+ return;
+ }
+ header += "out gl_PerVertex{vec4 gl_Position;";
+ if (ctx.info.stores[IR::Attribute::PointSize]) {
+ header += "float gl_PointSize;";
+ }
+ if (ctx.info.stores.ClipDistances()) {
+ header += "float gl_ClipDistance[];";
+ }
+ if (ctx.info.stores[IR::Attribute::ViewportIndex] &&
+ ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
+ header += "int gl_ViewportIndex;";
+ }
+ SetupLegacyOutPerVertex(ctx, header);
+ header += "};";
+ if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) {
+ header += "out int gl_ViewportIndex;";
+ }
+}
+
+void SetupInPerVertex(EmitContext& ctx, std::string& header) {
+ // Currently only required for TessellationControl to adhere to
+ // ARB_separate_shader_objects requirements
+ if (ctx.stage != Stage::TessellationControl) {
+ return;
+ }
+ const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
+ const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
+ const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
+ const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
+ if (!loads_per_vertex) {
+ return;
+ }
+ header += "in gl_PerVertex{";
+ if (loads_position) {
+ header += "vec4 gl_Position;";
+ }
+ if (loads_point_size) {
+ header += "float gl_PointSize;";
+ }
+ if (loads_clip_distance) {
+ header += "float gl_ClipDistance[];";
+ }
+ header += "}gl_in[gl_MaxPatchVertices];";
+}
+
+void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
+ if (!ctx.info.loads.Legacy()) {
+ return;
+ }
+ header += "in gl_PerFragment{";
+ if (ctx.info.loads.FixedFunctionTexture()) {
+ header += "vec4 gl_TexCoord[8];";
+ }
+ if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+ header += "vec4 gl_Color;";
+ }
+ header += "};";
+}
+
+} // Anonymous namespace
+
+EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_)
+ : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage},
+ uses_geometry_passthrough{program.is_geometry_passthrough &&
+ profile.support_geometry_shader_passthrough} {
+ if (profile.need_fastmath_off) {
+ header += "#pragma optionNV(fastmath off)\n";
+ }
+ SetupExtensions();
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ stage_name = "vs";
+ break;
+ case Stage::TessellationControl:
+ stage_name = "tcs";
+ header += fmt::format("layout(vertices={})out;", program.invocations);
+ break;
+ case Stage::TessellationEval:
+ stage_name = "tes";
+ header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive),
+ GetTessSpacing(runtime_info.tess_spacing),
+ runtime_info.tess_clockwise ? "cw" : "ccw");
+ break;
+ case Stage::Geometry:
+ stage_name = "gs";
+ header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology));
+ if (uses_geometry_passthrough) {
+ header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};";
+ break;
+ } else if (program.is_geometry_passthrough &&
+ !profile.support_geometry_shader_passthrough) {
+ LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported");
+ }
+ header += fmt::format(
+ "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];",
+ OutputPrimitive(program.output_topology), program.output_vertices);
+ break;
+ case Stage::Fragment:
+ stage_name = "fs";
+ position_name = "gl_FragCoord";
+ if (runtime_info.force_early_z) {
+ header += "layout(early_fragment_tests)in;";
+ }
+ if (info.uses_sample_id) {
+ header += "in int gl_SampleID;";
+ }
+ if (info.stores_sample_mask) {
+ header += "out int gl_SampleMask[];";
+ }
+ break;
+ case Stage::Compute:
+ stage_name = "cs";
+ const u32 local_x{std::max(program.workgroup_size[0], 1u)};
+ const u32 local_y{std::max(program.workgroup_size[1], 1u)};
+ const u32 local_z{std::max(program.workgroup_size[2], 1u)};
+ header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;",
+ local_x, local_y, local_z);
+ break;
+ }
+ SetupOutPerVertex(*this, header);
+ SetupInPerVertex(*this, header);
+ SetupLegacyInPerFragment(*this, header);
+
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) {
+ continue;
+ }
+ const auto qualifier{uses_geometry_passthrough ? "passthrough"
+ : fmt::format("location={}", index)};
+ header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier,
+ InterpDecorator(info.interpolation[index]), index,
+ InputArrayDecorator(stage));
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"};
+ header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index);
+ }
+ if (stage == Stage::Fragment) {
+ for (size_t index = 0; index < info.stores_frag_color.size(); ++index) {
+ if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
+ continue;
+ }
+ header += fmt::format("layout(location={})out vec4 frag_color{};", index, index);
+ }
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ DefineGenericOutput(index, program.invocations);
+ }
+ }
+ DefineConstantBuffers(bindings);
+ DefineStorageBuffers(bindings);
+ SetupImages(bindings);
+ SetupTextures(bindings);
+ DefineHelperFunctions();
+ DefineConstants();
+}
+
+void EmitContext::SetupExtensions() {
+ header += "#extension GL_ARB_separate_shader_objects : enable\n";
+ if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
+ header += "#extension GL_EXT_texture_shadow_lod : enable\n";
+ }
+ if (info.uses_int64 && profile.support_int64) {
+ header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+ }
+ if (info.uses_int64_bit_atomics) {
+ header += "#extension GL_NV_shader_atomic_int64 : enable\n";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "#extension GL_NV_shader_atomic_float : enable\n";
+ }
+ if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
+ header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
+ }
+ if (info.uses_fp16) {
+ if (profile.support_gl_nv_gpu_shader_5) {
+ header += "#extension GL_NV_gpu_shader5 : enable\n";
+ }
+ if (profile.support_gl_amd_gpu_shader_half_float) {
+ header += "#extension GL_AMD_gpu_shader_half_float : enable\n";
+ }
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
+ info.uses_subgroup_shuffles || info.uses_fswzadd) {
+ header += "#extension GL_ARB_shader_ballot : enable\n"
+ "#extension GL_ARB_shader_group_vote : enable\n";
+ if (!info.uses_int64 && profile.support_int64) {
+ header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+ }
+ if (profile.support_gl_warp_intrinsics) {
+ header += "#extension GL_NV_shader_thread_shuffle : enable\n";
+ }
+ }
+ if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) &&
+ profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) {
+ header += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
+ }
+ if (info.uses_sparse_residency && profile.support_gl_sparse_textures) {
+ header += "#extension GL_ARB_sparse_texture2 : enable\n";
+ }
+ if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+ header += "#extension GL_NV_viewport_array2 : enable\n";
+ }
+ if (info.uses_typeless_image_reads) {
+ header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
+ }
+ if (info.uses_derivatives && profile.support_gl_derivative_control) {
+ header += "#extension GL_ARB_derivative_control : enable\n";
+ }
+ if (uses_geometry_passthrough) {
+ header += "#extension GL_NV_geometry_shader_passthrough : enable\n";
+ }
+}
+
+void EmitContext::DefineConstantBuffers(Bindings& bindings) {
+ if (info.constant_buffer_descriptors.empty()) {
+ return;
+ }
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ header += fmt::format(
+ "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
+ bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
+ bindings.uniform_buffer += desc.count;
+ }
+}
+
+void EmitContext::DefineStorageBuffers(Bindings& bindings) {
+ if (info.storage_buffers_descriptors.empty()) {
+ return;
+ }
+ u32 index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};",
+ bindings.storage_buffer, stage_name, bindings.storage_buffer,
+ stage_name, index);
+ bindings.storage_buffer += desc.count;
+ index += desc.count;
+ }
+}
+
+void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
+ static constexpr std::string_view swizzle{"xyzw"};
+ const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+ u32 element{0};
+ while (element < 4) {
+ std::string definition{fmt::format("layout(location={}", index)};
+ const u32 remainder{4 - element};
+ const TransformFeedbackVarying* xfb_varying{};
+ if (!runtime_info.xfb_varyings.empty()) {
+ xfb_varying = &runtime_info.xfb_varyings[base_index + element];
+ xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+ }
+ const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+ if (element > 0) {
+ definition += fmt::format(",component={}", element);
+ }
+ if (xfb_varying) {
+ definition +=
+ fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer,
+ xfb_varying->stride, xfb_varying->offset);
+ }
+ std::string name{fmt::format("out_attr{}", index)};
+ if (num_components < 4 || element > 0) {
+ name += fmt::format("_{}", swizzle.substr(element, num_components));
+ }
+ const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)};
+ definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations));
+ header += definition;
+
+ const GenericElementInfo element_info{
+ .name = name,
+ .first_element = element,
+ .num_components = num_components,
+ };
+ std::fill_n(output_generics[index].begin() + element, num_components, element_info);
+ element += num_components;
+ }
+}
+
+void EmitContext::DefineHelperFunctions() {
+ header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
+ "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
+ if (info.uses_global_increment || info.uses_shared_increment) {
+ header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}";
+ }
+ if (info.uses_global_decrement || info.uses_shared_decrement) {
+ header += "uint CasDecrement(uint op_a,uint op_b){"
+ "return op_a==0||op_a>op_b?op_b:(op_a-1u);}";
+ }
+ if (info.uses_atomic_f32_add) {
+ header += "uint CasFloatAdd(uint op_a,float op_b){"
+ "return ftou(utof(op_a)+op_b);}";
+ }
+ if (info.uses_atomic_f32x2_add) {
+ header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){"
+ "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}";
+ }
+ if (info.uses_atomic_f32x2_min) {
+ header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return "
+ "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_f32x2_max) {
+ header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return "
+ "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_f16x2_add) {
+ header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return "
+ "packFloat2x16(unpackFloat2x16(op_a)+op_b);}";
+ }
+ if (info.uses_atomic_f16x2_min) {
+ header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return "
+ "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_f16x2_max) {
+ header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return "
+ "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}";
+ }
+ if (info.uses_atomic_s32_min) {
+ header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}";
+ }
+ if (info.uses_atomic_s32_max) {
+ header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
+ }
+ if (info.uses_global_memory && profile.support_int64) {
+ header += DefineGlobalMemoryFunctions();
+ }
+ if (info.loads_indexed_attributes) {
+ const bool is_array{stage == Stage::Geometry};
+ const auto vertex_arg{is_array ? ",uint vertex" : ""};
+ std::string func{
+ fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint "
+ "masked_index=uint(base_index)&3u;switch(base_index>>2){{",
+ vertex_arg)};
+ if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ const auto position_idx{is_array ? "gl_in[vertex]." : ""};
+ func += fmt::format("case {}:return {}{}[masked_index];",
+ static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx,
+ position_name);
+ }
+ const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+ for (u32 index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.loads.Generic(index)) {
+ continue;
+ }
+ const auto vertex_idx{is_array ? "[vertex]" : ""};
+ func += fmt::format("case {}:return in_attr{}{}[masked_index];",
+ base_attribute_value + index, index, vertex_idx);
+ }
+ func += "default: return 0.0;}}";
+ header += func;
+ }
+ if (info.stores_indexed_attributes) {
+ // TODO
+ }
+}
+
+std::string EmitContext::DefineGlobalMemoryFunctions() {
+ const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) {
+ const auto& ssbo{info.storage_buffers_descriptors[index]};
+ const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
+ const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
+ const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
+ std::array<std::string, 2> addr_xy;
+ std::array<std::string, 2> size_xy;
+ for (size_t i = 0; i < addr_xy.size(); ++i) {
+ const auto addr_loc{ssbo.cbuf_offset + 4 * i};
+ const auto size_loc{size_cbuf_offset + 4 * i};
+ addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc));
+ size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc));
+ }
+ const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])};
+ const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)};
+ func += addr_statment;
+
+ const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])};
+ const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)};
+ const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)};
+ const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)};
+ func += comparison;
+
+ const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
+ func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr);
+ }};
+ std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"};
+ std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"};
+ std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"};
+ std::string load_func{"uint LoadGlobal32(uint64_t addr){"};
+ std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"};
+ std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"};
+ const size_t num_buffers{info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!info.nvn_buffer_used[index]) {
+ continue;
+ }
+ define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}");
+ define_body(write_func_64, index,
+ "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}");
+ define_body(write_func_128, index,
+ "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint("
+ "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}");
+ define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}");
+ define_body(load_func_64, index,
+ "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}");
+ define_body(load_func_128, index,
+ "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
+ "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
+ }
+ write_func += '}';
+ write_func_64 += '}';
+ write_func_128 += '}';
+ load_func += "return 0u;}";
+ load_func_64 += "return uvec2(0);}";
+ load_func_128 += "return uvec4(0);}";
+ return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128;
+}
+
+void EmitContext::SetupImages(Bindings& bindings) {
+ image_buffers.reserve(info.image_buffer_descriptors.size());
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_buffers.push_back({bindings.image, desc.count});
+ const auto format{ImageFormatString(desc.format)};
+ const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
+ bindings.image, format, qualifier, bindings.image, array_decorator);
+ bindings.image += desc.count;
+ }
+ images.reserve(info.image_descriptors.size());
+ for (const auto& desc : info.image_descriptors) {
+ images.push_back({bindings.image, desc.count});
+ const auto format{ImageFormatString(desc.format)};
+ const auto image_type{ImageType(desc.type)};
+ const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
+ qualifier, image_type, bindings.image, array_decorator);
+ bindings.image += desc.count;
+ }
+}
+
+void EmitContext::SetupTextures(Bindings& bindings) {
+ texture_buffers.reserve(info.texture_buffer_descriptors.size());
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ texture_buffers.push_back({bindings.texture, desc.count});
+ const auto sampler_type{SamplerType(TextureType::Buffer, false)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
+ sampler_type, bindings.texture, array_decorator);
+ bindings.texture += desc.count;
+ }
+ textures.reserve(info.texture_descriptors.size());
+ for (const auto& desc : info.texture_descriptors) {
+ textures.push_back({bindings.texture, desc.count});
+ const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
+ const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
+ header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
+ sampler_type, bindings.texture, array_decorator);
+ bindings.texture += desc.count;
+ }
+}
+
+void EmitContext::DefineConstants() {
+ if (info.uses_fswzadd) {
+ header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);"
+ "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);";
+ }
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
new file mode 100644
index 000000000..d9b639d29
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -0,0 +1,174 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glsl/var_alloc.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+struct Info;
+struct Profile;
+struct RuntimeInfo;
+} // namespace Shader
+
+namespace Shader::Backend {
+struct Bindings;
+}
+
+namespace Shader::IR {
+class Inst;
+struct Program;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+
+struct GenericElementInfo {
+ std::string name;
+ u32 first_element{};
+ u32 num_components{};
+};
+
+struct TextureImageDefinition {
+ u32 binding;
+ u32 count;
+};
+
+class EmitContext {
+public:
+ explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
+ const RuntimeInfo& runtime_info_);
+
+ template <GlslVarType type, typename... Args>
+ void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
+ const auto var_def{var_alloc.AddDefine(inst, type)};
+ if (var_def.empty()) {
+ // skip assigment.
+ code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...);
+ } else {
+ code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...);
+ }
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ template <typename... Args>
+ void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U1>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F16x2>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U64>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F64>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32x2>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32x2>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32x3>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32x3>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::U32x4>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::F32x4>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::PrecF32>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) {
+ Add<GlslVarType::PrecF64>(format_str, inst, args...);
+ }
+
+ template <typename... Args>
+ void Add(const char* format_str, Args&&... args) {
+ code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
+ // TODO: Remove this
+ code += '\n';
+ }
+
+ std::string header;
+ std::string code;
+ VarAlloc var_alloc;
+ const Info& info;
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+
+ Stage stage{};
+ std::string_view stage_name = "invalid";
+ std::string_view position_name = "gl_Position";
+
+ std::vector<TextureImageDefinition> texture_buffers;
+ std::vector<TextureImageDefinition> image_buffers;
+ std::vector<TextureImageDefinition> textures;
+ std::vector<TextureImageDefinition> images;
+ std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
+
+ u32 num_safety_loop_vars{};
+
+ bool uses_y_direction{};
+ bool uses_cc_carry{};
+ bool uses_geometry_passthrough{};
+
+private:
+ void SetupExtensions();
+ void DefineConstantBuffers(Bindings& bindings);
+ void DefineStorageBuffers(Bindings& bindings);
+ void DefineGenericOutput(size_t index, u32 invocations);
+ void DefineHelperFunctions();
+ void DefineConstants();
+ std::string DefineGlobalMemoryFunctions();
+ void SetupImages(Bindings& bindings);
+ void SetupTextures(Bindings& bindings);
+};
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
new file mode 100644
index 000000000..8a430d573
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -0,0 +1,252 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+#include <type_traits>
+
+#include "common/div_ceil.h"
+#include "common/settings.h"
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
+ inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
+}
+
+template <typename ArgType>
+auto Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, std::string_view>) {
+ return ctx.var_alloc.Consume(arg);
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return arg;
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return arg.U32();
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return arg.Attribute();
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return arg.Patch();
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return arg.Reg();
+ }
+}
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
+ if constexpr (is_first_arg_inst) {
+ SetDefinition<func>(
+ ctx, inst, *inst,
+ Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ SetDefinition<func>(
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ } else {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+bool IsReference(IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::Reference;
+}
+
+void PrecolorInst(IR::Inst& phi) {
+ // Insert phi moves before references to avoid overwritting other phis
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Block& phi_block{*phi.PhiBlock(i)};
+ auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
+ IR::IREmitter ir{phi_block, it};
+ const IR::Value arg{phi.Arg(i)};
+ if (arg.IsImmediate()) {
+ ir.PhiMove(phi, arg);
+ } else {
+ ir.PhiMove(phi, IR::Value{arg.InstRecursive()});
+ }
+ }
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
+ }
+}
+
+void Precolor(const IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& phi : block->Instructions()) {
+ if (!IR::IsPhi(phi)) {
+ break;
+ }
+ PrecolorInst(phi);
+ }
+ }
+}
+
+void EmitCode(EmitContext& ctx, const IR::Program& program) {
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block:
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::If:
+ ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond));
+ break;
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ ctx.Add("}}");
+ break;
+ case IR::AbstractSyntaxNode::Type::Break:
+ if (node.data.break_node.cond.IsImmediate()) {
+ if (node.data.break_node.cond.U1()) {
+ ctx.Add("break;");
+ }
+ } else {
+ ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond));
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Return:
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.Add("return;");
+ break;
+ case IR::AbstractSyntaxNode::Type::Loop:
+ ctx.Add("for(;;){{");
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat:
+ if (Settings::values.disable_shader_loop_safety_checks) {
+ ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond));
+ } else {
+ ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++,
+ ctx.var_alloc.Consume(node.data.repeat.cond));
+ }
+ break;
+ default:
+ throw NotImplementedException("AbstractSyntaxNode Type {}", node.type);
+ }
+ }
+}
+
+std::string GlslVersionSpecifier(const EmitContext& ctx) {
+ if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) {
+ return " compatibility";
+ }
+ return "";
+}
+
+bool IsPreciseType(GlslVarType type) {
+ switch (type) {
+ case GlslVarType::PrecF32:
+ case GlslVarType::PrecF64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void DefineVariables(const EmitContext& ctx, std::string& header) {
+ for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) {
+ const auto type{static_cast<GlslVarType>(i)};
+ const auto& tracker{ctx.var_alloc.GetUseTracker(type)};
+ const auto type_name{ctx.var_alloc.GetGlslType(type)};
+ const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug};
+ const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""};
+ // Temps/return types that are never used are stored at index 0
+ if (tracker.uses_temp) {
+ header += fmt::format("{}{} t{}={}(0);", precise, type_name,
+ ctx.var_alloc.Representation(0, type), type_name);
+ }
+ for (u32 index = 0; index < tracker.num_used; ++index) {
+ header += fmt::format("{}{} {}={}(0);", precise, type_name,
+ ctx.var_alloc.Representation(index, type), type_name);
+ }
+ }
+ for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) {
+ header += fmt::format("int loop{}=0x2000;", i);
+ }
+}
+} // Anonymous namespace
+
+std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
+ Bindings& bindings) {
+ EmitContext ctx{program, bindings, profile, runtime_info};
+ Precolor(program);
+ EmitCode(ctx, program);
+ const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
+ ctx.header.insert(0, version);
+ if (program.shared_memory_size > 0) {
+ const auto requested_size{program.shared_memory_size};
+ const auto max_size{profile.gl_max_compute_smem_size};
+ const bool needs_clamp{requested_size > max_size};
+ if (needs_clamp) {
+ LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})",
+ requested_size, max_size);
+ }
+ const auto smem_size{needs_clamp ? max_size : requested_size};
+ ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U));
+ }
+ ctx.header += "void main(){\n";
+ if (program.local_memory_size > 0) {
+ ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U));
+ }
+ DefineVariables(ctx, ctx.header);
+ if (ctx.uses_cc_carry) {
+ ctx.header += "uint carry;";
+ }
+ if (program.info.uses_subgroup_shuffles) {
+ ctx.header += "bool shfl_in_bounds;";
+ }
+ ctx.code.insert(0, ctx.header);
+ ctx.code += '}';
+ return ctx.code;
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h
new file mode 100644
index 000000000..20e5719e6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.h
@@ -0,0 +1,24 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+
+[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) {
+ Bindings binding;
+ return EmitGLSL(profile, {}, program, binding);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
new file mode 100644
index 000000000..772acc5a4
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -0,0 +1,418 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{
+ "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"};
+
+void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+ std::string_view value, std::string_view function) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ const std::string smem{fmt::format("smem[{}>>2]", offset)};
+ ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret);
+}
+
+void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value, std::string_view function) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset))};
+ ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
+}
+
+void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value,
+ std::string_view function) {
+ const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset))};
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret);
+ ctx.AddF32("{}=utof({});", inst, ret);
+}
+} // Anonymous namespace
+
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32");
+}
+
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32");
+}
+
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement");
+}
+
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement");
+}
+
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
+}
+
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
+ pointer_offset);
+ ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
+ pointer_offset, value, pointer_offset, value);
+}
+
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32");
+}
+
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ const std::string u32_value{fmt::format("uint({})", value)};
+ SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32");
+}
+
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement");
+}
+
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement");
+}
+
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
+ ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int i=0;i<2;++i){{ "
+ "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+ ");}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int i=0;i<2;++i){{ "
+ "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int i=0;i<2;++i){{ "
+ "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])"
+ ");}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
+ ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset));
+ ctx.Add("for(int "
+ "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}"
+ "))[i]);}}",
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64(
+ "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_"
+ "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_"
+ "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64(
+ "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_"
+ "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name,
+ binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x),"
+ "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));",
+ inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
+ ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
+}
+
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
+}
+
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2");
+}
+
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2");
+}
+
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2");
+}
+
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2");
+}
+
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2");
+}
+
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value) {
+ SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2");
+}
+
+void EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+
+void EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("GLSL Instrucion");
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
new file mode 100644
index 000000000..e1d1b558e
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitBarrier(EmitContext& ctx) {
+ ctx.Add("barrier();");
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("groupMemoryBarrier();");
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ ctx.Add("memoryBarrier();");
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
new file mode 100644
index 000000000..3c1714e89
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void Alias(IR::Inst& inst, const IR::Value& value) {
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst& value_inst{*value.InstRecursive()};
+ value_inst.DestructiveAddUsage(inst.UseCount());
+ value_inst.DestructiveRemoveUsage();
+ inst.SetDefinition(value_inst.Definition<Id>());
+}
+} // Anonymous namespace
+
+void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
+ Alias(inst, value);
+}
+
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
+ // Fake one usage to get a real variable out of the condition
+ inst.DestructiveAddUsage(1);
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)};
+ const auto input{ctx.var_alloc.Consume(value)};
+ if (ret != input) {
+ ctx.Add("{}={};", ret, input);
+ }
+}
+
+void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
+ NotImplemented();
+}
+
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=ftou({});", inst, value);
+}
+
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
+}
+
+void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
+ NotImplemented();
+}
+
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=utof({});", inst, value);
+}
+
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=uint64BitsToDouble({});", inst, value);
+}
+
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=packUint2x32({});", inst, value);
+}
+
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32x2("{}=unpackUint2x32({});", inst, value);
+}
+
+void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=packFloat2x16({});", inst, value);
+}
+
+void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value);
+}
+
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=packHalf2x16({});", inst, value);
+}
+
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value);
+}
+
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=packDouble2x32({});", inst, value);
+}
+
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
new file mode 100644
index 000000000..49a66e3ec
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp
@@ -0,0 +1,219 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr std::string_view SWIZZLE{"xyzw"};
+void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite,
+ std::string_view object, u32 index) {
+ if (result == composite) {
+ // The result is aliased with the composite
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+ } else {
+ ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object);
+ }
+}
+} // Anonymous namespace
+
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2) {
+ ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2);
+}
+
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3) {
+ ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4) {
+ ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view e1,
+ [[maybe_unused]] std::string_view e2) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view e1,
+ [[maybe_unused]] std::string_view e2,
+ [[maybe_unused]] std::string_view e3) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view e1,
+ [[maybe_unused]] std::string_view e2,
+ [[maybe_unused]] std::string_view e3,
+ [[maybe_unused]] std::string_view e4) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] std::string_view object,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] std::string_view object,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view composite,
+ [[maybe_unused]] std::string_view object,
+ [[maybe_unused]] u32 index) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2) {
+ ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2);
+}
+
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3) {
+ ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3);
+}
+
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4) {
+ ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4);
+}
+
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index) {
+ ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]);
+}
+
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ CompositeInsert(ctx, ret, composite, object, index);
+}
+
+void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index) {
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+
+void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index) {
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+
+void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index) {
+ ctx.Add("{}.{}={};", composite, SWIZZLE[index], object);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
new file mode 100644
index 000000000..580063fa9
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -0,0 +1,456 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char SWIZZLE[]{"xyzw"};
+
+u32 CbufIndex(u32 offset) {
+ return (offset / 4) % 4;
+}
+
+char OffsetSwizzle(u32 offset) {
+ return SWIZZLE[CbufIndex(offset)];
+}
+
+bool IsInputArray(Stage stage) {
+ return stage == Stage::Geometry || stage == Stage::TessellationControl ||
+ stage == Stage::TessellationEval;
+}
+
+std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
+ return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
+}
+
+std::string_view OutputVertexIndex(EmitContext& ctx) {
+ return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
+}
+
+void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
+ const IR::Value& offset, u32 num_bits, std::string_view cast = {},
+ std::string_view bit_offset = {}) {
+ const bool is_immediate{offset.IsImmediate()};
+ const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug};
+ if (is_immediate) {
+ const s32 signed_offset{static_cast<s32>(offset.U32())};
+ static constexpr u32 cbuf_size{0x10000};
+ if (signed_offset < 0 || offset.U32() > cbuf_size) {
+ LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
+ ctx.Add("{}=0u;", ret);
+ return;
+ }
+ }
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16)
+ : fmt::format("{}>>4", offset_var)};
+ const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
+ : fmt::format("[({}>>2)%4]", offset_var)};
+
+ const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+ const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
+ const auto extraction{num_bits == 32 ? cbuf_cast
+ : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast,
+ bit_offset, num_bits)};
+ if (!component_indexing_bug) {
+ const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
+ ctx.Add("{}={};", ret, result);
+ return;
+ }
+ const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
+ for (u32 i = 0; i < 4; ++i) {
+ const auto swizzle_string{fmt::format(".{}", "xyzw"[i])};
+ const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)};
+ ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result);
+ }
+}
+
+void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
+ std::string_view cast) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ if (offset.IsImmediate()) {
+ const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)};
+ GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
+ } else {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("({}%4)*8", offset_var)};
+ GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset);
+ }
+}
+
+void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset,
+ std::string_view cast) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ if (offset.IsImmediate()) {
+ const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)};
+ GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
+ } else {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)};
+ GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset);
+ }
+}
+
+u32 TexCoordIndex(IR::Attribute attr) {
+ return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4;
+}
+} // Anonymous namespace
+
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf8(ctx, inst, binding, offset, "ftou");
+}
+
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf8(ctx, inst, binding, offset, "ftoi");
+}
+
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf16(ctx, inst, binding, offset, "ftou");
+}
+
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ GetCbuf16(ctx, inst, binding, offset, "ftoi");
+}
+
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ GetCbuf(ctx, ret, binding, offset, 32, "ftou");
+}
+
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
+ GetCbuf(ctx, ret, binding, offset, 32);
+}
+
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+ if (offset.IsImmediate()) {
+ static constexpr u32 cbuf_size{0x10000};
+ const u32 u32_offset{offset.U32()};
+ const s32 signed_offset{static_cast<s32>(offset.U32())};
+ if (signed_offset < 0 || u32_offset > cbuf_size) {
+ LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds");
+ ctx.AddU32x2("{}=uvec2(0u);", inst);
+ return;
+ }
+ if (u32_offset % 2 == 0) {
+ ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
+ OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
+ } else {
+ ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
+ OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
+ OffsetSwizzle(u32_offset + 4));
+ }
+ return;
+ }
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ if (!ctx.profile.has_gl_component_indexing_bug) {
+ ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
+ inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
+ return;
+ }
+ const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
+ const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
+ for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+ ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
+ swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
+ "xyzw"[(swizzle + 1) % 4]);
+ }
+}
+
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+ std::string_view vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+ if (element == 3) {
+ ctx.AddF32("{}=1.f;", inst, attr);
+ } else {
+ ctx.AddF32("{}=0.f;", inst, attr);
+ }
+ return;
+ }
+ ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle);
+ return;
+ }
+ // GLSL only exposes 8 legacy texcoords
+ if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
+ TexCoordIndex(attr));
+ ctx.AddF32("{}=0.f;", inst);
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ ctx.AddF32("{}=itof(gl_PrimitiveID);", inst);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW: {
+ const bool is_array{IsInputArray(ctx.stage)};
+ const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""};
+ ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle);
+ break;
+ }
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ if (ctx.stage == Stage::Fragment) {
+ ctx.AddF32("{}=gl_Color.{};", inst, swizzle);
+ } else {
+ ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle);
+ }
+ break;
+ case IR::Attribute::PointSpriteS:
+ case IR::Attribute::PointSpriteT:
+ ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::TessellationEvaluationPointU:
+ case IR::Attribute::TessellationEvaluationPointV:
+ ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle);
+ break;
+ case IR::Attribute::InstanceId:
+ ctx.AddF32("{}=itof(gl_InstanceID);", inst);
+ break;
+ case IR::Attribute::VertexId:
+ ctx.AddF32("{}=itof(gl_VertexID);", inst);
+ break;
+ case IR::Attribute::FrontFace:
+ ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
+ break;
+ default:
+ throw NotImplementedException("Get attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
+ [[maybe_unused]] std::string_view vertex) {
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ const u32 attr_element{IR::GenericAttributeElement(attr)};
+ const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
+ const auto output_decorator{OutputVertexIndex(ctx)};
+ if (info.num_components == 1) {
+ ctx.Add("{}{}={};", info.name, output_decorator, value);
+ } else {
+ const u32 index_element{attr_element - info.first_element};
+ ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value);
+ }
+ return;
+ }
+ const u32 element{static_cast<u32>(attr) % 4};
+ const char swizzle{"xyzw"[element]};
+ // GLSL only exposes 8 legacy texcoords
+ if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) {
+ LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]",
+ TexCoordIndex(attr));
+ return;
+ }
+ if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) {
+ const u32 index{TexCoordIndex(attr)};
+ ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value);
+ return;
+ }
+ switch (attr) {
+ case IR::Attribute::Layer:
+ if (ctx.stage != Stage::Geometry &&
+ !ctx.profile.support_viewport_index_layer_non_geometry) {
+ LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support "
+ "viewport layer extension");
+ break;
+ }
+ ctx.Add("gl_Layer=ftoi({});", value);
+ break;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.stage != Stage::Geometry &&
+ !ctx.profile.support_viewport_index_layer_non_geometry) {
+ LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support "
+ "viewport layer extension");
+ break;
+ }
+ ctx.Add("gl_ViewportIndex=ftoi({});", value);
+ break;
+ case IR::Attribute::ViewportMask:
+ if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) {
+ LOG_WARNING(
+ Shader_GLSL,
+ "Shader stores viewport mask but device does not support viewport mask extension");
+ break;
+ }
+ ctx.Add("gl_ViewportMask[0]=ftoi({});", value);
+ break;
+ case IR::Attribute::PointSize:
+ ctx.Add("gl_PointSize={};", value);
+ break;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ ctx.Add("gl_Position.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontDiffuseR:
+ case IR::Attribute::ColorFrontDiffuseG:
+ case IR::Attribute::ColorFrontDiffuseB:
+ case IR::Attribute::ColorFrontDiffuseA:
+ ctx.Add("gl_FrontColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorFrontSpecularR:
+ case IR::Attribute::ColorFrontSpecularG:
+ case IR::Attribute::ColorFrontSpecularB:
+ case IR::Attribute::ColorFrontSpecularA:
+ ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackDiffuseR:
+ case IR::Attribute::ColorBackDiffuseG:
+ case IR::Attribute::ColorBackDiffuseB:
+ case IR::Attribute::ColorBackDiffuseA:
+ ctx.Add("gl_BackColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::ColorBackSpecularR:
+ case IR::Attribute::ColorBackSpecularG:
+ case IR::Attribute::ColorBackSpecularB:
+ case IR::Attribute::ColorBackSpecularA:
+ ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
+ break;
+ case IR::Attribute::FogCoordinate:
+ ctx.Add("gl_FogFragCoord={};", value);
+ break;
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)};
+ ctx.Add("gl_ClipDistance[{}]={};", index, value);
+ break;
+ }
+ default:
+ throw NotImplementedException("Set attribute {}", attr);
+ }
+}
+
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+ std::string_view vertex) {
+ const bool is_array{ctx.stage == Stage::Geometry};
+ const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""};
+ ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg);
+}
+
+void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view offset,
+ [[maybe_unused]] std::string_view value,
+ [[maybe_unused]] std::string_view vertex) {
+ NotImplemented();
+}
+
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ const char swizzle{"xyzw"[element]};
+ ctx.AddF32("{}=patch{}.{};", inst, index, swizzle);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const u32 element{IR::GenericPatchElement(patch)};
+ ctx.Add("patch{}.{}={};", index, "xyzw"[element], value);
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ ctx.Add("gl_TessLevelOuter[{}]={};", index, value);
+ break;
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ ctx.Add("gl_TessLevelInner[0]={};", value);
+ break;
+ case IR::Patch::TessellationLodInteriorV:
+ ctx.Add("gl_TessLevelInner[1]={};", value);
+ break;
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) {
+ const char swizzle{"xyzw"[component]};
+ ctx.Add("frag_color{}.{}={};", index, swizzle, value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, std::string_view value) {
+ ctx.Add("gl_SampleMask[0]=int({});", value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, std::string_view value) {
+ ctx.Add("gl_FragDepth={};", value);
+}
+
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32x3("{}=gl_LocalInvocationID;", inst);
+}
+
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32x3("{}=gl_WorkGroupID;", inst);
+}
+
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_InvocationID);", inst);
+}
+
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SampleID);", inst);
+}
+
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU1("{}=gl_HelperInvocation;", inst);
+}
+
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst) {
+ ctx.uses_y_direction = true;
+ ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst);
+}
+
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) {
+ ctx.AddU32("{}=lmem[{}];", inst, word_offset);
+}
+
+void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) {
+ ctx.Add("lmem[{}]={};", word_offset, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
new file mode 100644
index 000000000..53f8896be
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp
@@ -0,0 +1,21 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitJoin(EmitContext&) {
+ throw NotImplementedException("Join shouldn't be emitted");
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ ctx.Add("discard;");
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
new file mode 100644
index 000000000..eeae6562c
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp
@@ -0,0 +1,230 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value);
+}
+
+void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=int({});", inst, value);
+}
+
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=int({});", inst, value);
+}
+
+void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=int64_t({});", inst, value);
+}
+
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=int64_t({});", inst, value);
+}
+
+void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=uint64_t({});", inst, value);
+}
+
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint({});", inst, value);
+}
+
+void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double({});", inst, value);
+}
+
+void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float(int({}));", inst, value);
+}
+
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float(int64_t({}));", inst, value);
+}
+
+void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({}&0xffff);", inst, value);
+}
+
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=float({});", inst, value);
+}
+
+void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double(int({}));", inst, value);
+}
+
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double(int64_t({}));", inst, value);
+}
+
+void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double({});", inst, value);
+}
+
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=double({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
new file mode 100644
index 000000000..d423bfb1b
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp
@@ -0,0 +1,456 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs,
+ std::string_view op, bool ordered) {
+ const auto nan_op{ordered ? "&&!" : "||"};
+ ctx.AddU1("{}={}{}{}"
+ "{}isnan({}){}isnan({});",
+ inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs);
+}
+
+bool IsPrecise(const IR::Inst& inst) {
+ return inst.Flags<IR::FpControl>().no_contraction;
+}
+} // Anonymous namespace
+
+void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=abs({});", inst, value);
+}
+
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=abs({});", inst, value);
+}
+
+void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
+ NotImplemented();
+}
+
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF32("{}={}+{};", inst, a, b);
+ } else {
+ ctx.AddF32("{}={}+{};", inst, a, b);
+ }
+}
+
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF64("{}={}+{};", inst, a, b);
+ } else {
+ ctx.AddF64("{}={}+{};", inst, a, b);
+ }
+}
+
+void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b,
+ [[maybe_unused]] std::string_view c) {
+ NotImplemented();
+}
+
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c);
+ } else {
+ ctx.AddF32("{}=fma({},{},{});", inst, a, b, c);
+ }
+}
+
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c);
+ } else {
+ ctx.AddF64("{}=fma({},{},{});", inst, a, b, c);
+ }
+}
+
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF32("{}=max({},{});", inst, a, b);
+}
+
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF64("{}=max({},{});", inst, a, b);
+}
+
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF32("{}=min({},{});", inst, a, b);
+}
+
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddF64("{}=min({},{});", inst, a, b);
+}
+
+void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) {
+ NotImplemented();
+}
+
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF32("{}={}*{};", inst, a, b);
+ } else {
+ ctx.AddF32("{}={}*{};", inst, a, b);
+ }
+}
+
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ if (IsPrecise(inst)) {
+ ctx.AddPrecF64("{}={}*{};", inst, a, b);
+ } else {
+ ctx.AddF64("{}={}*{};", inst, a, b);
+ }
+}
+
+void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=-({});", inst, value);
+}
+
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=-({});", inst, value);
+}
+
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=sin({});", inst, value);
+}
+
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=cos({});", inst, value);
+}
+
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=exp2({});", inst, value);
+}
+
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=log2({});", inst, value);
+}
+
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=(1.0f)/{};", inst, value);
+}
+
+void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=1.0/{};", inst, value);
+}
+
+void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ ctx.AddF32("{}=inversesqrt({});", inst, value);
+}
+
+void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=sqrt({});", inst, value);
+}
+
+void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value);
+}
+
+void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value);
+}
+
+void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value,
+ [[maybe_unused]] std::string_view min_value,
+ [[maybe_unused]] std::string_view max_value) {
+ NotImplemented();
+}
+
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value) {
+ // GLSL's clamp does not produce desirable results
+ ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value);
+}
+
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value) {
+ // GLSL's clamp does not produce desirable results
+ ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value);
+}
+
+void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=roundEven({});", inst, value);
+}
+
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=roundEven({});", inst, value);
+}
+
+void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=floor({});", inst, value);
+}
+
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=floor({});", inst, value);
+}
+
+void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=ceil({});", inst, value);
+}
+
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=ceil({});", inst, value);
+}
+
+void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF32("{}=trunc({});", inst, value);
+}
+
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddF64("{}=trunc({});", inst, value);
+}
+
+void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", true);
+}
+
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", true);
+}
+
+void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", false);
+}
+
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "==", false);
+}
+
+void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", true);
+}
+
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", true);
+}
+
+void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", false);
+}
+
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "!=", false);
+}
+
+void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", true);
+}
+
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", true);
+}
+
+void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", false);
+}
+
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<", false);
+}
+
+void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", true);
+}
+
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", true);
+}
+
+void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", false);
+}
+
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">", false);
+}
+
+void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", true);
+}
+
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", true);
+}
+
+void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", false);
+}
+
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, "<=", false);
+}
+
+void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", true);
+}
+
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", true);
+}
+
+void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx,
+ [[maybe_unused]] std::string_view lhs,
+ [[maybe_unused]] std::string_view rhs) {
+ NotImplemented();
+}
+
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", false);
+}
+
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ Compare(ctx, inst, lhs, rhs, ">=", false);
+}
+
+void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
+ [[maybe_unused]] std::string_view value) {
+ NotImplemented();
+}
+
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU1("{}=isnan({});", inst, value);
+}
+
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU1("{}=isnan({});", inst, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
new file mode 100644
index 000000000..447eb8e0a
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -0,0 +1,799 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
+ const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index)
+ : ctx.textures.at(info.descriptor_index)};
+ const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
+ return fmt::format("tex{}{}", def.binding, index_offset);
+}
+
+std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) {
+ const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index)
+ : ctx.images.at(info.descriptor_index)};
+ const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""};
+ return fmt::format("img{}{}", def.binding, index_offset);
+}
+
+std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::Buffer:
+ return fmt::format("int({})", value);
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ case TextureType::ColorArray2D:
+ return fmt::format("ivec2({})", value);
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ return fmt::format("ivec3({})", value);
+ case TextureType::ColorArrayCube:
+ return fmt::format("ivec4({})", value);
+ default:
+ throw NotImplementedException("Integer cast for TextureType {}", info.type.Value());
+ }
+}
+
+std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::Buffer:
+ return fmt::format("int({})", value);
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ return fmt::format("ivec2({})", value);
+ case TextureType::ColorArray2D:
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ return fmt::format("ivec3({})", value);
+ case TextureType::ColorArrayCube:
+ return fmt::format("ivec4({})", value);
+ default:
+ throw NotImplementedException("TexelFetchCast type {}", info.type.Value());
+ }
+}
+
+bool NeedsShadowLodExt(TextureType type) {
+ switch (type) {
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ case TextureType::ColorArrayCube:
+ return true;
+ default:
+ return false;
+ }
+}
+
+std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ return fmt::format("int({})", offset.U32());
+ }
+ IR::Inst* const inst{offset.InstRecursive()};
+ if (inst->AreAllArgsImmediates()) {
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::CompositeConstructU32x2:
+ return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32());
+ case IR::Opcode::CompositeConstructU32x3:
+ return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
+ inst->Arg(2).U32());
+ case IR::Opcode::CompositeConstructU32x4:
+ return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(),
+ inst->Arg(2).U32(), inst->Arg(3).U32());
+ default:
+ break;
+ }
+ }
+ const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi};
+ if (!has_var_aoffi) {
+ LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING");
+ }
+ const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"};
+ switch (offset.Type()) {
+ case IR::Type::U32:
+ return fmt::format("int({})", offset_str);
+ case IR::Type::U32x2:
+ return fmt::format("ivec2({})", offset_str);
+ case IR::Type::U32x3:
+ return fmt::format("ivec3({})", offset_str);
+ case IR::Type::U32x4:
+ return fmt::format("ivec4({})", offset_str);
+ default:
+ throw NotImplementedException("Offset type {}", offset.Type());
+ }
+}
+
+std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) {
+ const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
+ if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING");
+ return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))";
+ }
+ const IR::Opcode opcode{values[0]->GetOpcode()};
+ if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+ throw LogicError("Invalid PTP arguments");
+ }
+ auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
+
+ return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0),
+ read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2),
+ read(1, 3));
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view bias_lc,
+ const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (!offset.IsEmpty()) {
+ const auto offset_str{GetOffsetVec(ctx, offset)};
+ if (ctx.stage == Stage::Fragment) {
+ ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias);
+ } else {
+ ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str);
+ }
+ } else {
+ if (ctx.stage == Stage::Fragment) {
+ ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias);
+ } else {
+ ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords);
+ }
+ }
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));",
+ *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias);
+ } else {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst,
+ texture, coords, texel, bias);
+ }
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view lod_lc,
+ const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (!offset.IsEmpty()) {
+ ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc,
+ GetOffsetVec(ctx, offset));
+ } else {
+ ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc);
+ }
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), lod_lc,
+ GetOffsetVec(ctx, offset), texel);
+ } else {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst,
+ texture, coords, lod_lc, texel);
+ }
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view bias_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples");
+ }
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""};
+ const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
+ const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
+ const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod &&
+ ctx.stage != Stage::Fragment && needs_shadow_ext};
+ if (use_grad) {
+ LOG_WARNING(Shader_GLSL,
+ "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
+ if (info.type == TextureType::ColorArrayCube) {
+ LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
+ ctx.AddF32("{}=0.0f;", inst);
+ return;
+ }
+ const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
+ ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
+ d_cast, d_cast);
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ const auto offset_str{GetOffsetVec(ctx, offset)};
+ if (ctx.stage == Stage::Fragment) {
+ ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref,
+ offset_str, bias);
+ } else {
+ ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords,
+ dref, offset_str);
+ }
+ } else {
+ if (ctx.stage == Stage::Fragment) {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
+ } else {
+ ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
+ }
+ } else {
+ ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref);
+ }
+ }
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view lod_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples");
+ }
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const bool needs_shadow_ext{NeedsShadowLodExt(info.type)};
+ const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext};
+ const auto cast{needs_shadow_ext ? "vec4" : "vec3"};
+ if (use_grad) {
+ LOG_WARNING(Shader_GLSL,
+ "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback");
+ if (info.type == TextureType::ColorArrayCube) {
+ LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing");
+ ctx.AddF32("{}=0.0f;", inst);
+ return;
+ }
+ const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"};
+ ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref,
+ d_cast, d_cast);
+ return;
+ }
+ if (!offset.IsEmpty()) {
+ const auto offset_str{GetOffsetVec(ctx, offset)};
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc,
+ offset_str);
+ } else {
+ ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords,
+ dref, lod_lc, offset_str);
+ }
+ } else {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc);
+ } else {
+ ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref,
+ lod_lc);
+ }
+ }
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (offset.IsEmpty()) {
+ ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
+ info.gather_component);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords,
+ GetOffsetVec(ctx, offset), info.gather_component);
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets,
+ info.gather_component);
+ return;
+ }
+ if (offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));",
+ *sparse_inst, texture, coords, texel, info.gather_component);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
+ *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset),
+ texel, info.gather_component);
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));",
+ *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel,
+ info.gather_component);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
+ std::string_view dref) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (offset.IsEmpty()) {
+ ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref,
+ GetOffsetVec(ctx, offset));
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets);
+ return;
+ }
+ if (offset.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst,
+ texture, coords, dref, texel);
+ return;
+ }
+ if (offset2.IsEmpty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), dref,
+ GetOffsetVec(ctx, offset), texel);
+ return;
+ }
+ // PTP
+ const auto offsets{PtpOffsets(offset, offset2)};
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view offset, std::string_view lod,
+ [[maybe_unused]] std::string_view ms) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_bias) {
+ throw NotImplementedException("EmitImageFetch Bias texture samples");
+ }
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageFetch Lod clamp samples");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const bool supports_sparse{ctx.profile.support_gl_sparse_textures};
+ if (sparse_inst && !supports_sparse) {
+ LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
+ ctx.AddU1("{}=true;", *sparse_inst);
+ }
+ if (!sparse_inst || !supports_sparse) {
+ if (!offset.empty()) {
+ ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
+ CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
+ } else {
+ if (info.type == TextureType::Buffer) {
+ ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
+ } else {
+ ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
+ CoordsCastToInt(coords, info), lod);
+ }
+ }
+ return;
+ }
+ if (!offset.empty()) {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), lod,
+ CastToIntVec(offset, info), texel);
+ } else {
+ ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));",
+ *sparse_inst, texture, CastToIntVec(coords, info), lod, texel);
+ }
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view lod) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ switch (info.type) {
+ case TextureType::Color1D:
+ return ctx.AddU32x4(
+ "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
+ texture, lod, texture);
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ case TextureType::ColorCube:
+ return ctx.AddU32x4(
+ "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
+ texture, lod, texture);
+ case TextureType::ColorArray2D:
+ case TextureType::Color3D:
+ case TextureType::ColorArrayCube:
+ return ctx.AddU32x4(
+ "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
+ lod, texture);
+ case TextureType::Buffer:
+ throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
+ }
+ throw LogicError("Unspecified image type {}", info.type.Value());
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto texture{Texture(ctx, info, index)};
+ return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& derivatives,
+ const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ if (info.has_lod_clamp) {
+ throw NotImplementedException("EmitImageGradient Lod clamp samples");
+ }
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageGradient Sparse");
+ }
+ if (!offset.IsEmpty()) {
+ throw NotImplementedException("EmitImageGradient offset");
+ }
+ const auto texture{Texture(ctx, info, index)};
+ const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
+ const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
+ if (multi_component) {
+ ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
+ derivatives_vec, derivatives_vec);
+ } else {
+ ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords,
+ derivatives_vec, derivatives_vec);
+ }
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ if (sparse_inst) {
+ throw NotImplementedException("EmitImageRead Sparse");
+ }
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info));
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view color) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
+ std::string_view) {
+ NotImplemented();
+}
+
+void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view,
+ std::string_view) {
+ NotImplemented();
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value);
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto image{Image(ctx, info, index)};
+ ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info),
+ value);
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageGather(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageRead(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+ NotImplemented();
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
new file mode 100644
index 000000000..5936d086f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -0,0 +1,702 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string_view>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+class EmitContext;
+
+#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__)
+
+// Microinstruction emitters
+void EmitPhi(EmitContext& ctx, IR::Inst& inst);
+void EmitVoid(EmitContext& ctx);
+void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
+void EmitReference(EmitContext& ctx, const IR::Value& value);
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
+ std::string_view vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value,
+ std::string_view vertex);
+void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
+ std::string_view vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value,
+ std::string_view vertex);
+void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value);
+void EmitSetSampleMask(EmitContext& ctx, std::string_view value);
+void EmitSetFragDepth(EmitContext& ctx, std::string_view value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst);
+void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitInvocationId(EmitContext& ctx, IR::Inst& inst);
+void EmitSampleId(EmitContext& ctx, IR::Inst& inst);
+void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst);
+void EmitYDirection(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset);
+void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value);
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst);
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value);
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value);
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset);
+void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value);
+void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2);
+void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3);
+void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4);
+void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2);
+void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2,
+ std::string_view e3);
+void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2,
+ std::string_view e3, std::string_view e4);
+void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index);
+void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2);
+void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3);
+void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1,
+ std::string_view e2, std::string_view e3, std::string_view e4);
+void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ u32 index);
+void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite,
+ std::string_view object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object,
+ u32 index);
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+ std::string_view false_value);
+void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+ std::string_view false_value);
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value,
+ std::string_view false_value);
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value);
+void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
+void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
+void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c);
+void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c);
+void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ std::string_view c);
+void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value);
+void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value);
+void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view min_value, std::string_view max_value);
+void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs);
+void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift);
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view insert, std::string_view offset, std::string_view count);
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count);
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count);
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max);
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max);
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs);
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs);
+void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
+ std::string_view value);
+void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset, std::string_view value);
+void EmitGlobalAtomicIAdd32(EmitContext& ctx);
+void EmitGlobalAtomicSMin32(EmitContext& ctx);
+void EmitGlobalAtomicUMin32(EmitContext& ctx);
+void EmitGlobalAtomicSMax32(EmitContext& ctx);
+void EmitGlobalAtomicUMax32(EmitContext& ctx);
+void EmitGlobalAtomicInc32(EmitContext& ctx);
+void EmitGlobalAtomicDec32(EmitContext& ctx);
+void EmitGlobalAtomicAnd32(EmitContext& ctx);
+void EmitGlobalAtomicOr32(EmitContext& ctx);
+void EmitGlobalAtomicXor32(EmitContext& ctx);
+void EmitGlobalAtomicExchange32(EmitContext& ctx);
+void EmitGlobalAtomicIAdd64(EmitContext& ctx);
+void EmitGlobalAtomicSMin64(EmitContext& ctx);
+void EmitGlobalAtomicUMin64(EmitContext& ctx);
+void EmitGlobalAtomicSMax64(EmitContext& ctx);
+void EmitGlobalAtomicUMax64(EmitContext& ctx);
+void EmitGlobalAtomicInc64(EmitContext& ctx);
+void EmitGlobalAtomicDec64(EmitContext& ctx);
+void EmitGlobalAtomicAnd64(EmitContext& ctx);
+void EmitGlobalAtomicOr64(EmitContext& ctx);
+void EmitGlobalAtomicXor64(EmitContext& ctx);
+void EmitGlobalAtomicExchange64(EmitContext& ctx);
+void EmitGlobalAtomicAddF32(EmitContext& ctx);
+void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+void EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b);
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
+void EmitBindlessImageSampleImplicitLod(EmitContext&);
+void EmitBindlessImageSampleExplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+void EmitBindlessImageGather(EmitContext&);
+void EmitBindlessImageGatherDref(EmitContext&);
+void EmitBindlessImageFetch(EmitContext&);
+void EmitBindlessImageQueryDimensions(EmitContext&);
+void EmitBindlessImageQueryLod(EmitContext&);
+void EmitBindlessImageGradient(EmitContext&);
+void EmitBindlessImageRead(EmitContext&);
+void EmitBindlessImageWrite(EmitContext&);
+void EmitBoundImageSampleImplicitLod(EmitContext&);
+void EmitBoundImageSampleExplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+void EmitBoundImageGather(EmitContext&);
+void EmitBoundImageGatherDref(EmitContext&);
+void EmitBoundImageFetch(EmitContext&);
+void EmitBoundImageQueryDimensions(EmitContext&);
+void EmitBoundImageQueryLod(EmitContext&);
+void EmitBoundImageGradient(EmitContext&);
+void EmitBoundImageRead(EmitContext&);
+void EmitBoundImageWrite(EmitContext&);
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view bias_lc,
+ const IR::Value& offset);
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view lod_lc,
+ const IR::Value& offset);
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view bias_lc, const IR::Value& offset);
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view dref,
+ std::string_view lod_lc, const IR::Value& offset);
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2);
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& offset, const IR::Value& offset2,
+ std::string_view dref);
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view offset, std::string_view lod,
+ std::string_view ms);
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view lod);
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords);
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp);
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords);
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view color);
+void EmitBindlessImageAtomicIAdd32(EmitContext&);
+void EmitBindlessImageAtomicSMin32(EmitContext&);
+void EmitBindlessImageAtomicUMin32(EmitContext&);
+void EmitBindlessImageAtomicSMax32(EmitContext&);
+void EmitBindlessImageAtomicUMax32(EmitContext&);
+void EmitBindlessImageAtomicInc32(EmitContext&);
+void EmitBindlessImageAtomicDec32(EmitContext&);
+void EmitBindlessImageAtomicAnd32(EmitContext&);
+void EmitBindlessImageAtomicOr32(EmitContext&);
+void EmitBindlessImageAtomicXor32(EmitContext&);
+void EmitBindlessImageAtomicExchange32(EmitContext&);
+void EmitBoundImageAtomicIAdd32(EmitContext&);
+void EmitBoundImageAtomicSMin32(EmitContext&);
+void EmitBoundImageAtomicUMin32(EmitContext&);
+void EmitBoundImageAtomicSMax32(EmitContext&);
+void EmitBoundImageAtomicUMax32(EmitContext&);
+void EmitBoundImageAtomicInc32(EmitContext&);
+void EmitBoundImageAtomicDec32(EmitContext&);
+void EmitBoundImageAtomicAnd32(EmitContext&);
+void EmitBoundImageAtomicOr32(EmitContext&);
+void EmitBoundImageAtomicXor32(EmitContext&);
+void EmitBoundImageAtomicExchange32(EmitContext&);
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ std::string_view coords, std::string_view value);
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst);
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred);
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst);
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst);
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask);
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
+ std::string_view clamp, std::string_view segmentation_mask);
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask);
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask);
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
+ std::string_view swizzle);
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a);
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
new file mode 100644
index 000000000..38419f88f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
+ IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
+ if (!zero) {
+ return;
+ }
+ ctx.AddU1("{}={}==0;", *zero, result);
+ zero->Invalidate();
+}
+
+void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) {
+ IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
+ if (!sign) {
+ return;
+ }
+ ctx.AddU1("{}=int({})<0;", *sign, result);
+ sign->Invalidate();
+}
+
+void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b,
+ char lop) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}={}{}{};", result, a, lop, b);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+} // Anonymous namespace
+
+void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ // Compute the overflow CC first as it requires the original operand values,
+ // which may be overwritten by the result of the addition
+ if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+ // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+ constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+ const auto sub_a{fmt::format("{}u-{}", s32_max, a)};
+ const auto positive_result{fmt::format("int({})>int({})", b, sub_a)};
+ const auto negative_result{fmt::format("int({})<int({})", b, sub_a)};
+ ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result);
+ overflow->Invalidate();
+ }
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+ ctx.uses_cc_carry = true;
+ ctx.Add("{}=uaddCarry({},{},carry);", result, a, b);
+ ctx.AddU1("{}=carry!=0;", *carry);
+ carry->Invalidate();
+ } else {
+ ctx.Add("{}={}+{};", result, a, b);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU64("{}={}+{};", inst, a, b);
+}
+
+void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}={}-{};", inst, a, b);
+}
+
+void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU64("{}={}-{};", inst, a, b);
+}
+
+void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=uint({}*{});", inst, a, b);
+}
+
+void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=uint(-({}));", inst, value);
+}
+
+void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU64("{}=-({});", inst, value);
+}
+
+void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=abs(int({}));", inst, value);
+}
+
+void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU32("{}={}<<{};", inst, base, shift);
+}
+
+void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU64("{}={}<<{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU32("{}={}>>{};", inst, base, shift);
+}
+
+void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU64("{}={}>>{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU32("{}=int({})>>{};", inst, base, shift);
+}
+
+void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view shift) {
+ ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift);
+}
+
+void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ BitwiseLogicalOp(ctx, inst, a, b, '&');
+}
+
+void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ BitwiseLogicalOp(ctx, inst, a, b, '|');
+}
+
+void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ BitwiseLogicalOp(ctx, inst, a, b, '^');
+}
+
+void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view insert, std::string_view offset, std::string_view count) {
+ ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count);
+}
+
+void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base,
+ std::string_view offset, std::string_view count) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=bitfieldReverse({});", inst, value);
+}
+
+void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=bitCount({});", inst, value);
+}
+
+void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=~{};", inst, value);
+}
+
+void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=findMSB(int({}));", inst, value);
+}
+
+void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU32("{}=findMSB(uint({}));", inst, value);
+}
+
+void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=min(int({}),int({}));", inst, a, b);
+}
+
+void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b);
+}
+
+void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=max(int({}),int({}));", inst, a, b);
+}
+
+void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b);
+}
+
+void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min,
+ std::string_view max) {
+ const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)};
+ ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max);
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+}
+
+void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs);
+}
+
+void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs);
+}
+
+void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}={}=={};", inst, lhs, rhs);
+}
+
+void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs);
+}
+
+void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs);
+}
+
+void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs);
+}
+
+void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs);
+}
+
+void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) {
+ ctx.AddU1("{}={}!={};", inst, lhs, rhs);
+}
+
+void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs);
+}
+
+void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs,
+ std::string_view rhs) {
+ ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
new file mode 100644
index 000000000..338ff4bd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU1("{}={}||{};", inst, a, b);
+}
+
+void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU1("{}={}&&{};", inst, a, b);
+}
+
+void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) {
+ ctx.AddU1("{}={}^^{};", inst, a, b);
+}
+
+void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
+ ctx.AddU1("{}=!{};", inst, value);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
new file mode 100644
index 000000000..e3957491f
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -0,0 +1,202 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
+ "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
+ "if(cas_result==old_value){{break;}}}}"};
+
+void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var,
+ std::string_view value, std::string_view bit_offset, u32 num_bits) {
+ const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)};
+ ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits);
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobalS8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobalU16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobalS16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+ if (ctx.profile.support_int64) {
+ return ctx.AddU32("{}=LoadGlobal32({});", inst, address);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+ if (ctx.profile.support_int64) {
+ return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+ ctx.AddU32x2("{}=uvec2(0);", inst);
+}
+
+void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
+ if (ctx.profile.support_int64) {
+ return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+ ctx.AddU32x4("{}=uvec4(0);", inst);
+}
+
+void EmitWriteGlobalU8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobalS8(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobalU16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobalS16(EmitContext&) {
+ NotImplemented();
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
+ if (ctx.profile.support_int64) {
+ return ctx.Add("WriteGlobal32({},{});", address, value);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
+ if (ctx.profile.support_int64) {
+ return ctx.Add("WriteGlobal64({},{});", address, value);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
+ if (ctx.profile.support_int64) {
+ return ctx.Add("WriteGlobal128({},{});", address, value);
+ }
+ LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
+ binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
+ binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
+ binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
+ ctx.stage_name, binding.U32(), offset_var, offset_var);
+}
+
+void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
+}
+
+void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
+ binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
+}
+
+void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
+ const IR::Value& offset) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
+ "+12)>>2]);",
+ inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
+ offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
+ binding.U32(), offset_var);
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int({}%4)*8", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8);
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)};
+ SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16);
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ std::string_view value) {
+ const auto offset_var{ctx.var_alloc.Consume(offset)};
+ ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
new file mode 100644
index 000000000..f420fe388
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#endif
+
+namespace Shader::Backend::GLSL {
+
+void EmitGetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetRegister(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetPred(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetGotoVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetIndirectBranchVariable(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetZFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetSFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetCFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitSetOFlag(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetZeroFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSignFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetCarryFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetOverflowFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetSparseFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+void EmitGetInBoundsFromOp(EmitContext& ctx) {
+ NotImplemented();
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
new file mode 100644
index 000000000..49fba9073
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+ [[maybe_unused]] std::string_view true_value,
+ [[maybe_unused]] std::string_view false_value) {
+ NotImplemented();
+}
+
+void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+ [[maybe_unused]] std::string_view true_value,
+ [[maybe_unused]] std::string_view false_value) {
+ NotImplemented();
+}
+
+void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond,
+ [[maybe_unused]] std::string_view true_value,
+ [[maybe_unused]] std::string_view false_value) {
+ NotImplemented();
+}
+
+void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
+ std::string_view true_value, std::string_view false_value) {
+ ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
new file mode 100644
index 000000000..518b78f06
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -0,0 +1,79 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+constexpr char cas_loop[]{"for(;;){{uint old_value={};uint "
+ "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));"
+ "if(cas_result==old_value){{break;}}}}"};
+
+void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value,
+ std::string_view bit_offset, u32 num_bits) {
+ const auto smem{fmt::format("smem[{}>>2]", offset)};
+ ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits);
+}
+} // Anonymous namespace
+
+void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
+}
+
+void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
+}
+
+void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
+}
+
+void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
+}
+
+void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32("{}=smem[{}>>2];", inst, offset);
+}
+
+void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
+}
+
+void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) {
+ ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
+ offset, offset, offset, offset);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ const auto bit_offset{fmt::format("int({}%4)*8", offset)};
+ SharedWriteCas(ctx, offset, value, bit_offset, 8);
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)};
+ SharedWriteCas(ctx, offset, value, bit_offset, 16);
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ ctx.Add("smem[{}>>2]={};", offset, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ ctx.Add("smem[{}>>2]={}.x;", offset, value);
+ ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) {
+ ctx.Add("smem[{}>>2]={}.x;", offset, value);
+ ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+ ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
+ ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
new file mode 100644
index 000000000..9b866f889
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -0,0 +1,111 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string_view OutputVertexIndex(EmitContext& ctx) {
+ return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
+}
+
+void InitializeOutputVaryings(EmitContext& ctx) {
+ if (ctx.uses_geometry_passthrough) {
+ return;
+ }
+ if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
+ ctx.Add("gl_Position=vec4(0,0,0,1);");
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!ctx.info.stores.Generic(index)) {
+ continue;
+ }
+ const auto& info_array{ctx.output_generics.at(index)};
+ const auto output_decorator{OutputVertexIndex(ctx)};
+ size_t element{};
+ while (element < info_array.size()) {
+ const auto& info{info_array.at(element)};
+ const auto varying_name{fmt::format("{}{}", info.name, output_decorator)};
+ switch (info.num_components) {
+ case 1: {
+ const char value{element == 3 ? '1' : '0'};
+ ctx.Add("{}={}.f;", varying_name, value);
+ break;
+ }
+ case 2:
+ case 3:
+ if (element + info.num_components < 4) {
+ ctx.Add("{}=vec{}(0);", varying_name, info.num_components);
+ } else {
+ // last element is the w component, must be initialized to 1
+ const auto zeros{info.num_components == 3 ? "0,0," : "0,"};
+ ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros);
+ }
+ break;
+ case 4:
+ ctx.Add("{}=vec4(0,0,0,1);", varying_name);
+ break;
+ default:
+ break;
+ }
+ element += info.num_components;
+ }
+ }
+}
+} // Anonymous namespace
+
+void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
+ const size_t num_args{phi.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ ctx.var_alloc.Consume(phi.Arg(i));
+ }
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type());
+ }
+}
+
+void EmitVoid(EmitContext&) {}
+
+void EmitReference(EmitContext& ctx, const IR::Value& value) {
+ ctx.var_alloc.Consume(value);
+}
+
+void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) {
+ IR::Inst& phi{*phi_value.InstRecursive()};
+ const auto phi_type{phi.Arg(0).Type()};
+ if (!phi.Definition<Id>().is_valid) {
+ // The phi node wasn't forward defined
+ ctx.var_alloc.PhiDefine(phi, phi_type);
+ }
+ const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})};
+ const auto val_reg{ctx.var_alloc.Consume(value)};
+ if (phi_reg == val_reg) {
+ return;
+ }
+ ctx.Add("{}={};", phi_reg, val_reg);
+}
+
+void EmitPrologue(EmitContext& ctx) {
+ InitializeOutputVaryings(ctx);
+}
+
+void EmitEpilogue(EmitContext&) {}
+
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
+ ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
+ InitializeOutputVaryings(ctx);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream));
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
new file mode 100644
index 000000000..15bf02dd6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+
+namespace Shader::Backend::GLSL {
+
+void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU1("{}=false;", inst);
+}
+
+void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=0u;", inst);
+}
+
+void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU64("{}=0u;", inst);
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
new file mode 100644
index 000000000..a982dd8a2
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -0,0 +1,217 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/backend/glsl/emit_context.h"
+#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
+ IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (!in_bounds) {
+ return;
+ }
+ ctx.AddU1("{}=shfl_in_bounds;", *in_bounds);
+ in_bounds->Invalidate();
+}
+
+std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) {
+ return fmt::format("({}&{})", thread_id, segmentation_mask);
+}
+
+std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp,
+ std::string_view not_seg_mask) {
+ return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask);
+}
+
+std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
+ const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
+ return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask);
+}
+
+void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
+ std::string_view value, std::string_view index,
+ [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) {
+ const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)};
+ ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
+ SetInBoundsFlag(ctx, inst);
+}
+} // Anonymous namespace
+
+void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
+}
+
+void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+ ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+ }
+}
+
+void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+ ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+ }
+}
+
+void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
+ const auto value{fmt::format("({}^{})", ballot, active_mask)};
+ ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+ }
+}
+
+void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
+ } else {
+ ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
+ }
+}
+
+void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
+}
+
+void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
+}
+
+void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
+}
+
+void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
+}
+
+void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
+ ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
+}
+
+void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
+ const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
+
+ const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
+ const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
+ ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
+ std::string_view clamp, std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+ const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
+ ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+ const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
+ ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
+ std::string_view index, std::string_view clamp,
+ std::string_view segmentation_mask) {
+ if (ctx.profile.support_gl_warp_intrinsics) {
+ UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
+ return;
+ }
+ const auto thread_id{"gl_SubGroupInvocationARB"};
+ const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
+ const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
+ ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
+ SetInBoundsFlag(ctx, inst);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
+}
+
+void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
+ std::string_view swizzle) {
+ const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)};
+ const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask);
+ const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask);
+ ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b);
+}
+
+void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdxFine({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx");
+ ctx.AddF32("{}=dFdx({});", inst, op_a);
+ }
+}
+
+void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdyFine({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy");
+ ctx.AddF32("{}=dFdy({});", inst, op_a);
+ }
+}
+
+void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdxCoarse({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx");
+ ctx.AddF32("{}=dFdx({});", inst, op_a);
+ }
+}
+
+void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
+ if (ctx.profile.support_gl_derivative_control) {
+ ctx.AddF32("{}=dFdyCoarse({});", inst, op_a);
+ } else {
+ LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy");
+ ctx.AddF32("{}=dFdy({});", inst, op_a);
+ }
+}
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp
new file mode 100644
index 000000000..194f926ca
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp
@@ -0,0 +1,308 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/backend/glsl/var_alloc.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLSL {
+namespace {
+std::string TypePrefix(GlslVarType type) {
+ switch (type) {
+ case GlslVarType::U1:
+ return "b_";
+ case GlslVarType::F16x2:
+ return "f16x2_";
+ case GlslVarType::U32:
+ return "u_";
+ case GlslVarType::F32:
+ return "f_";
+ case GlslVarType::U64:
+ return "u64_";
+ case GlslVarType::F64:
+ return "d_";
+ case GlslVarType::U32x2:
+ return "u2_";
+ case GlslVarType::F32x2:
+ return "f2_";
+ case GlslVarType::U32x3:
+ return "u3_";
+ case GlslVarType::F32x3:
+ return "f3_";
+ case GlslVarType::U32x4:
+ return "u4_";
+ case GlslVarType::F32x4:
+ return "f4_";
+ case GlslVarType::PrecF32:
+ return "pf_";
+ case GlslVarType::PrecF64:
+ return "pd_";
+ case GlslVarType::Void:
+ return "";
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+std::string FormatFloat(std::string_view value, IR::Type type) {
+ // TODO: Confirm FP64 nan/inf
+ if (type == IR::Type::F32) {
+ if (value == "nan") {
+ return "utof(0x7fc00000)";
+ }
+ if (value == "inf") {
+ return "utof(0x7f800000)";
+ }
+ if (value == "-inf") {
+ return "utof(0xff800000)";
+ }
+ }
+ if (value.find_first_of('e') != std::string_view::npos) {
+ // scientific notation
+ const auto cast{type == IR::Type::F32 ? "float" : "double"};
+ return fmt::format("{}({})", cast, value);
+ }
+ const bool needs_dot{value.find_first_of('.') == std::string_view::npos};
+ const bool needs_suffix{!value.ends_with('f')};
+ const auto suffix{type == IR::Type::F32 ? "f" : "lf"};
+ return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : "");
+}
+
+std::string MakeImm(const IR::Value& value) {
+ switch (value.Type()) {
+ case IR::Type::U1:
+ return fmt::format("{}", value.U1() ? "true" : "false");
+ case IR::Type::U32:
+ return fmt::format("{}u", value.U32());
+ case IR::Type::F32:
+ return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32);
+ case IR::Type::U64:
+ return fmt::format("{}ul", value.U64());
+ case IR::Type::F64:
+ return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64);
+ case IR::Type::Void:
+ return "";
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+}
+} // Anonymous namespace
+
+std::string VarAlloc::Representation(u32 index, GlslVarType type) const {
+ const auto prefix{TypePrefix(type)};
+ return fmt::format("{}{}", prefix, index);
+}
+
+std::string VarAlloc::Representation(Id id) const {
+ return Representation(id.index, id.type);
+}
+
+std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(type));
+ return Representation(inst.Definition<Id>());
+ } else {
+ Id id{};
+ id.type.Assign(type);
+ GetUseTracker(type).uses_temp = true;
+ inst.SetDefinition<Id>(id);
+ return 't' + Representation(inst.Definition<Id>());
+ }
+}
+
+std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) {
+ return Define(inst, RegType(type));
+}
+
+std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) {
+ return AddDefine(inst, RegType(type));
+}
+
+std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) {
+ if (inst.HasUses()) {
+ inst.SetDefinition<Id>(Alloc(type));
+ return Representation(inst.Definition<Id>());
+ } else {
+ return "";
+ }
+ return Representation(inst.Definition<Id>());
+}
+
+std::string VarAlloc::Consume(const IR::Value& value) {
+ return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive());
+}
+
+std::string VarAlloc::ConsumeInst(IR::Inst& inst) {
+ inst.DestructiveRemoveUsage();
+ if (!inst.HasUses()) {
+ Free(inst.Definition<Id>());
+ }
+ return Representation(inst.Definition<Id>());
+}
+
+std::string VarAlloc::GetGlslType(IR::Type type) const {
+ return GetGlslType(RegType(type));
+}
+
+Id VarAlloc::Alloc(GlslVarType type) {
+ auto& use_tracker{GetUseTracker(type)};
+ const auto num_vars{use_tracker.var_use.size()};
+ for (size_t var = 0; var < num_vars; ++var) {
+ if (use_tracker.var_use[var]) {
+ continue;
+ }
+ use_tracker.num_used = std::max(use_tracker.num_used, var + 1);
+ use_tracker.var_use[var] = true;
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.type.Assign(type);
+ ret.index.Assign(static_cast<u32>(var));
+ return ret;
+ }
+ // Allocate a new variable
+ use_tracker.var_use.push_back(true);
+ Id ret{};
+ ret.is_valid.Assign(1);
+ ret.type.Assign(type);
+ ret.index.Assign(static_cast<u32>(use_tracker.num_used));
+ ++use_tracker.num_used;
+ return ret;
+}
+
+void VarAlloc::Free(Id id) {
+ if (id.is_valid == 0) {
+ throw LogicError("Freeing invalid variable");
+ }
+ auto& use_tracker{GetUseTracker(id.type)};
+ use_tracker.var_use[id.index] = false;
+}
+
+GlslVarType VarAlloc::RegType(IR::Type type) const {
+ switch (type) {
+ case IR::Type::U1:
+ return GlslVarType::U1;
+ case IR::Type::U32:
+ return GlslVarType::U32;
+ case IR::Type::F32:
+ return GlslVarType::F32;
+ case IR::Type::U64:
+ return GlslVarType::U64;
+ case IR::Type::F64:
+ return GlslVarType::F64;
+ default:
+ throw NotImplementedException("IR type {}", type);
+ }
+}
+
+std::string VarAlloc::GetGlslType(GlslVarType type) const {
+ switch (type) {
+ case GlslVarType::U1:
+ return "bool";
+ case GlslVarType::F16x2:
+ return "f16vec2";
+ case GlslVarType::U32:
+ return "uint";
+ case GlslVarType::F32:
+ case GlslVarType::PrecF32:
+ return "float";
+ case GlslVarType::U64:
+ return "uint64_t";
+ case GlslVarType::F64:
+ case GlslVarType::PrecF64:
+ return "double";
+ case GlslVarType::U32x2:
+ return "uvec2";
+ case GlslVarType::F32x2:
+ return "vec2";
+ case GlslVarType::U32x3:
+ return "uvec3";
+ case GlslVarType::F32x3:
+ return "vec3";
+ case GlslVarType::U32x4:
+ return "uvec4";
+ case GlslVarType::F32x4:
+ return "vec4";
+ case GlslVarType::Void:
+ return "";
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) {
+ switch (type) {
+ case GlslVarType::U1:
+ return var_bool;
+ case GlslVarType::F16x2:
+ return var_f16x2;
+ case GlslVarType::U32:
+ return var_u32;
+ case GlslVarType::F32:
+ return var_f32;
+ case GlslVarType::U64:
+ return var_u64;
+ case GlslVarType::F64:
+ return var_f64;
+ case GlslVarType::U32x2:
+ return var_u32x2;
+ case GlslVarType::F32x2:
+ return var_f32x2;
+ case GlslVarType::U32x3:
+ return var_u32x3;
+ case GlslVarType::F32x3:
+ return var_f32x3;
+ case GlslVarType::U32x4:
+ return var_u32x4;
+ case GlslVarType::F32x4:
+ return var_f32x4;
+ case GlslVarType::PrecF32:
+ return var_precf32;
+ case GlslVarType::PrecF64:
+ return var_precf64;
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const {
+ switch (type) {
+ case GlslVarType::U1:
+ return var_bool;
+ case GlslVarType::F16x2:
+ return var_f16x2;
+ case GlslVarType::U32:
+ return var_u32;
+ case GlslVarType::F32:
+ return var_f32;
+ case GlslVarType::U64:
+ return var_u64;
+ case GlslVarType::F64:
+ return var_f64;
+ case GlslVarType::U32x2:
+ return var_u32x2;
+ case GlslVarType::F32x2:
+ return var_f32x2;
+ case GlslVarType::U32x3:
+ return var_u32x3;
+ case GlslVarType::F32x3:
+ return var_f32x3;
+ case GlslVarType::U32x4:
+ return var_u32x4;
+ case GlslVarType::F32x4:
+ return var_f32x4;
+ case GlslVarType::PrecF32:
+ return var_precf32;
+ case GlslVarType::PrecF64:
+ return var_precf64;
+ default:
+ throw NotImplementedException("Type {}", type);
+ }
+}
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h
new file mode 100644
index 000000000..8b49f32a6
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/var_alloc.h
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+#include <vector>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Shader::IR {
+class Inst;
+class Value;
+enum class Type;
+} // namespace Shader::IR
+
+namespace Shader::Backend::GLSL {
+enum class GlslVarType : u32 {
+ U1,
+ F16x2,
+ U32,
+ F32,
+ U64,
+ F64,
+ U32x2,
+ F32x2,
+ U32x3,
+ F32x3,
+ U32x4,
+ F32x4,
+ PrecF32,
+ PrecF64,
+ Void,
+};
+
+struct Id {
+ union {
+ u32 raw;
+ BitField<0, 1, u32> is_valid;
+ BitField<1, 4, GlslVarType> type;
+ BitField<6, 26, u32> index;
+ };
+
+ bool operator==(Id rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+ bool operator!=(Id rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(sizeof(Id) == sizeof(u32));
+
+class VarAlloc {
+public:
+ struct UseTracker {
+ bool uses_temp{};
+ size_t num_used{};
+ std::vector<bool> var_use;
+ };
+
+ /// Used for explicit usages of variables, may revert to temporaries
+ std::string Define(IR::Inst& inst, GlslVarType type);
+ std::string Define(IR::Inst& inst, IR::Type type);
+
+ /// Used to assign variables used by the IR. May return a blank string if
+ /// the instruction's result is unused in the IR.
+ std::string AddDefine(IR::Inst& inst, GlslVarType type);
+ std::string PhiDefine(IR::Inst& inst, IR::Type type);
+
+ std::string Consume(const IR::Value& value);
+ std::string ConsumeInst(IR::Inst& inst);
+
+ std::string GetGlslType(GlslVarType type) const;
+ std::string GetGlslType(IR::Type type) const;
+
+ const UseTracker& GetUseTracker(GlslVarType type) const;
+ std::string Representation(u32 index, GlslVarType type) const;
+
+private:
+ GlslVarType RegType(IR::Type type) const;
+ Id Alloc(GlslVarType type);
+ void Free(Id id);
+ UseTracker& GetUseTracker(GlslVarType type);
+ std::string Representation(Id id) const;
+
+ UseTracker var_bool{};
+ UseTracker var_f16x2{};
+ UseTracker var_u32{};
+ UseTracker var_u32x2{};
+ UseTracker var_u32x3{};
+ UseTracker var_u32x4{};
+ UseTracker var_f32{};
+ UseTracker var_f32x2{};
+ UseTracker var_f32x3{};
+ UseTracker var_f32x4{};
+ UseTracker var_u64{};
+ UseTracker var_f64{};
+ UseTracker var_precf32{};
+ UseTracker var_precf64{};
+};
+
+} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
new file mode 100644
index 000000000..2d29d8c14
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -0,0 +1,1368 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "shader_recompiler/backend/spirv/emit_context.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+enum class Operation {
+ Increment,
+ Decrement,
+ FPAdd,
+ FPMin,
+ FPMax,
+};
+
+struct AttrInfo {
+ Id pointer;
+ Id id;
+ bool needs_cast;
+};
+
+Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
+ const spv::ImageFormat format{spv::ImageFormat::Unknown};
+ const Id type{ctx.F32[1]};
+ const bool depth{desc.is_depth};
+ switch (desc.type) {
+ case TextureType::Color1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
+ case TextureType::ColorArray1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
+ case TextureType::Color2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
+ case TextureType::ColorArray2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
+ case TextureType::Color3D:
+ return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
+ case TextureType::ColorCube:
+ return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format);
+ case TextureType::ColorArrayCube:
+ return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format);
+ case TextureType::Buffer:
+ break;
+ }
+ throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+
+spv::ImageFormat GetImageFormat(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return spv::ImageFormat::Unknown;
+ case ImageFormat::R8_UINT:
+ return spv::ImageFormat::R8ui;
+ case ImageFormat::R8_SINT:
+ return spv::ImageFormat::R8i;
+ case ImageFormat::R16_UINT:
+ return spv::ImageFormat::R16ui;
+ case ImageFormat::R16_SINT:
+ return spv::ImageFormat::R16i;
+ case ImageFormat::R32_UINT:
+ return spv::ImageFormat::R32ui;
+ case ImageFormat::R32G32_UINT:
+ return spv::ImageFormat::Rg32ui;
+ case ImageFormat::R32G32B32A32_UINT:
+ return spv::ImageFormat::Rgba32ui;
+ }
+ throw InvalidArgument("Invalid image format {}", format);
+}
+
+Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
+ const spv::ImageFormat format{GetImageFormat(desc.format)};
+ const Id type{ctx.U32[1]};
+ switch (desc.type) {
+ case TextureType::Color1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);
+ case TextureType::ColorArray1D:
+ return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format);
+ case TextureType::Color2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format);
+ case TextureType::ColorArray2D:
+ return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format);
+ case TextureType::Color3D:
+ return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format);
+ case TextureType::Buffer:
+ throw NotImplementedException("Image buffer");
+ default:
+ break;
+ }
+ throw InvalidArgument("Invalid texture type {}", desc.type);
+}
+
+Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin,
+ spv::StorageClass storage_class) {
+ const Id pointer_type{ctx.TypePointer(storage_class, type)};
+ const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)};
+ if (builtin) {
+ ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin);
+ }
+ ctx.interfaces.push_back(id);
+ return id;
+}
+
+u32 NumVertices(InputTopology input_topology) {
+ switch (input_topology) {
+ case InputTopology::Points:
+ return 1;
+ case InputTopology::Lines:
+ return 2;
+ case InputTopology::LinesAdjacency:
+ return 4;
+ case InputTopology::Triangles:
+ return 3;
+ case InputTopology::TrianglesAdjacency:
+ return 6;
+ }
+ throw InvalidArgument("Invalid input topology {}", input_topology);
+}
+
+Id DefineInput(EmitContext& ctx, Id type, bool per_invocation,
+ std::optional<spv::BuiltIn> builtin = std::nullopt) {
+ switch (ctx.stage) {
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ if (per_invocation) {
+ type = ctx.TypeArray(type, ctx.Const(32u));
+ }
+ break;
+ case Stage::Geometry:
+ if (per_invocation) {
+ const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)};
+ type = ctx.TypeArray(type, ctx.Const(num_vertices));
+ }
+ break;
+ default:
+ break;
+ }
+ return DefineVariable(ctx, type, builtin, spv::StorageClass::Input);
+}
+
+Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations,
+ std::optional<spv::BuiltIn> builtin = std::nullopt) {
+ if (invocations && ctx.stage == Stage::TessellationControl) {
+ type = ctx.TypeArray(type, ctx.Const(*invocations));
+ }
+ return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
+}
+
+void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) {
+ static constexpr std::string_view swizzle{"xyzw"};
+ const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+ u32 element{0};
+ while (element < 4) {
+ const u32 remainder{4 - element};
+ const TransformFeedbackVarying* xfb_varying{};
+ if (!ctx.runtime_info.xfb_varyings.empty()) {
+ xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element];
+ xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
+ }
+ const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
+
+ const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)};
+ ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ if (element > 0) {
+ ctx.Decorate(id, spv::Decoration::Component, element);
+ }
+ if (xfb_varying) {
+ ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
+ ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
+ ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
+ }
+ if (num_components < 4 || element > 0) {
+ const std::string_view subswizzle{swizzle.substr(element, num_components)};
+ ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
+ } else {
+ ctx.Name(id, fmt::format("out_attr{}", index));
+ }
+ const GenericElementInfo info{
+ .id = id,
+ .first_element = element,
+ .num_components = num_components,
+ };
+ std::fill_n(ctx.output_generics[index].begin() + element, num_components, info);
+ element += num_components;
+ }
+}
+
+Id GetAttributeType(EmitContext& ctx, AttributeType type) {
+ switch (type) {
+ case AttributeType::Float:
+ return ctx.F32[4];
+ case AttributeType::SignedInt:
+ return ctx.TypeVector(ctx.TypeInt(32, true), 4);
+ case AttributeType::UnsignedInt:
+ return ctx.U32[4];
+ case AttributeType::Disabled:
+ break;
+ }
+ throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
+ const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+ switch (type) {
+ case AttributeType::Float:
+ return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+ case AttributeType::UnsignedInt:
+ return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+ case AttributeType::SignedInt:
+ return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+ case AttributeType::Disabled:
+ return std::nullopt;
+ }
+ throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+std::string_view StageName(Stage stage) {
+ switch (stage) {
+ case Stage::VertexA:
+ return "vs_a";
+ case Stage::VertexB:
+ return "vs";
+ case Stage::TessellationControl:
+ return "tcs";
+ case Stage::TessellationEval:
+ return "tes";
+ case Stage::Geometry:
+ return "gs";
+ case Stage::Fragment:
+ return "fs";
+ case Stage::Compute:
+ return "cs";
+ }
+ throw InvalidArgument("Invalid stage {}", stage);
+}
+
+template <typename... Args>
+void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) {
+ ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage),
+ std::forward<Args>(args)...)
+ .c_str());
+}
+
+void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type,
+ u32 binding, Id type, char type_char, u32 element_size) {
+ const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))};
+ ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+ const Id struct_type{ctx.TypeStruct(array_type)};
+ Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT);
+ ctx.Decorate(struct_type, spv::Decoration::Block);
+ ctx.MemberName(struct_type, 0, "data");
+ ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
+
+ const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)};
+ const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)};
+ ctx.uniform_types.*member_type = uniform_type;
+
+ for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+ const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)};
+ ctx.Decorate(id, spv::Decoration::Binding, binding);
+ ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ ctx.Name(id, fmt::format("c{}", desc.index));
+ for (size_t i = 0; i < desc.count; ++i) {
+ ctx.cbufs[desc.index + i].*member_type = id;
+ }
+ if (ctx.profile.supported_spirv >= 0x00010400) {
+ ctx.interfaces.push_back(id);
+ }
+ binding += desc.count;
+ }
+}
+
+void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def,
+ Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type,
+ u32 stride) {
+ const Id array_type{ctx.TypeRuntimeArray(type)};
+ ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride);
+
+ const Id struct_type{ctx.TypeStruct(array_type)};
+ ctx.Decorate(struct_type, spv::Decoration::Block);
+ ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
+
+ const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
+ type_def.array = struct_pointer;
+ type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type);
+
+ u32 index{};
+ for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+ const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)};
+ ctx.Decorate(id, spv::Decoration::Binding, binding);
+ ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ ctx.Name(id, fmt::format("ssbo{}", index));
+ if (ctx.profile.supported_spirv >= 0x00010400) {
+ ctx.interfaces.push_back(id);
+ }
+ for (size_t i = 0; i < desc.count; ++i) {
+ ctx.ssbos[index + i].*member_type = id;
+ }
+ index += desc.count;
+ binding += desc.count;
+ }
+}
+
+Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) {
+ const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
+ const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id op_a{ctx.OpFunctionParameter(value_type)};
+ const Id op_b{ctx.OpFunctionParameter(value_type)};
+ ctx.AddLabel();
+ Id result{};
+ switch (operation) {
+ case Operation::Increment: {
+ const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
+ const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
+ result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
+ break;
+ }
+ case Operation::Decrement: {
+ const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
+ const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
+ const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
+ const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
+ result = ctx.OpSelect(value_type, pred, op_b, decr);
+ break;
+ }
+ case Operation::FPAdd:
+ result = ctx.OpFAdd(value_type, op_a, op_b);
+ break;
+ case Operation::FPMin:
+ result = ctx.OpFMin(value_type, op_a, op_b);
+ break;
+ case Operation::FPMax:
+ result = ctx.OpFMax(value_type, op_a, op_b);
+ break;
+ default:
+ break;
+ }
+ ctx.OpReturnValue(result);
+ ctx.OpFunctionEnd();
+ return func;
+}
+
+Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer,
+ Id value_type, Id memory_type, spv::Scope scope) {
+ const bool is_shared{scope == spv::Scope::Workgroup};
+ const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout};
+ const Id cas_func{CasFunction(ctx, operation, value_type)};
+ const Id zero{ctx.u32_zero_value};
+ const Id scope_id{ctx.Const(static_cast<u32>(scope))};
+
+ const Id loop_header{ctx.OpLabel()};
+ const Id continue_block{ctx.OpLabel()};
+ const Id merge_block{ctx.OpLabel()};
+ const Id func_type{is_shared
+ ? ctx.TypeFunction(value_type, ctx.U32[1], value_type)
+ : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)};
+
+ const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id index{ctx.OpFunctionParameter(ctx.U32[1])};
+ const Id op_b{ctx.OpFunctionParameter(value_type)};
+ const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)};
+ ctx.AddLabel();
+ ctx.OpBranch(loop_header);
+ ctx.AddLabel(loop_header);
+
+ ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+ ctx.OpBranch(continue_block);
+
+ ctx.AddLabel(continue_block);
+ const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index)
+ : ctx.OpAccessChain(element_pointer, base, index)};
+ if (value_type.value == ctx.F32[2].value) {
+ const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)};
+ const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)};
+ const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)};
+ const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)};
+ const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
+ zero, u32_new_value, u32_value)};
+ const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)};
+ ctx.OpBranchConditional(success, merge_block, loop_header);
+
+ ctx.AddLabel(merge_block);
+ ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res));
+ } else {
+ const Id value{ctx.OpLoad(memory_type, word_pointer)};
+ const bool matching_type{value_type.value == memory_type.value};
+ const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)};
+ const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)};
+ const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)};
+ const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero,
+ zero, new_value, value)};
+ const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)};
+ ctx.OpBranchConditional(success, merge_block, loop_header);
+
+ ctx.AddLabel(merge_block);
+ ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res));
+ }
+ ctx.OpFunctionEnd();
+ return func;
+}
+
+template <typename Desc>
+std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) {
+ if (desc.count > 1) {
+ return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index,
+ desc.cbuf_offset, desc.count);
+ } else {
+ return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index,
+ desc.cbuf_offset);
+ }
+}
+
+Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
+ if (count > 1) {
+ const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))};
+ return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type);
+ } else {
+ return pointer_type;
+ }
+}
+} // Anonymous namespace
+
+void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
+ defs[0] = sirit_ctx.Name(base_type, name);
+
+ std::array<char, 6> def_name;
+ for (int i = 1; i < 4; ++i) {
+ const std::string_view def_name_view(
+ def_name.data(),
+ fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size);
+ defs[static_cast<size_t>(i)] =
+ sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view);
+ }
+}
+
+EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
+ IR::Program& program, Bindings& bindings)
+ : Sirit::Module(profile_.supported_spirv), profile{profile_},
+ runtime_info{runtime_info_}, stage{program.stage} {
+ const bool is_unified{profile.unified_descriptor_binding};
+ u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer};
+ u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer};
+ u32& texture_binding{is_unified ? bindings.unified : bindings.texture};
+ u32& image_binding{is_unified ? bindings.unified : bindings.image};
+ AddCapability(spv::Capability::Shader);
+ DefineCommonTypes(program.info);
+ DefineCommonConstants();
+ DefineInterfaces(program);
+ DefineLocalMemory(program);
+ DefineSharedMemory(program);
+ DefineSharedMemoryFunctions(program);
+ DefineConstantBuffers(program.info, uniform_binding);
+ DefineStorageBuffers(program.info, storage_binding);
+ DefineTextureBuffers(program.info, texture_binding);
+ DefineImageBuffers(program.info, image_binding);
+ DefineTextures(program.info, texture_binding);
+ DefineImages(program.info, image_binding);
+ DefineAttributeMemAccess(program.info);
+ DefineGlobalMemoryFunctions(program.info);
+}
+
+EmitContext::~EmitContext() = default;
+
+Id EmitContext::Def(const IR::Value& value) {
+ if (!value.IsImmediate()) {
+ return value.InstRecursive()->Definition<Id>();
+ }
+ switch (value.Type()) {
+ case IR::Type::Void:
+ // Void instructions are used for optional arguments (e.g. texture offsets)
+ // They are not meant to be used in the SPIR-V module
+ return Id{};
+ case IR::Type::U1:
+ return value.U1() ? true_value : false_value;
+ case IR::Type::U32:
+ return Const(value.U32());
+ case IR::Type::U64:
+ return Constant(U64, value.U64());
+ case IR::Type::F32:
+ return Const(value.F32());
+ case IR::Type::F64:
+ return Constant(F64[1], value.F64());
+ default:
+ throw NotImplementedException("Immediate type {}", value.Type());
+ }
+}
+
+Id EmitContext::BitOffset8(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ return Const((offset.U32() % 4) * 8);
+ }
+ return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u));
+}
+
+Id EmitContext::BitOffset16(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ return Const(((offset.U32() / 2) % 2) * 16);
+ }
+ return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u));
+}
+
+void EmitContext::DefineCommonTypes(const Info& info) {
+ void_id = TypeVoid();
+
+ U1 = Name(TypeBool(), "u1");
+
+ F32.Define(*this, TypeFloat(32), "f32");
+ U32.Define(*this, TypeInt(32, false), "u32");
+ S32.Define(*this, TypeInt(32, true), "s32");
+
+ private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
+
+ input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
+ input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
+ input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
+
+ output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
+ output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
+
+ if (info.uses_int8 && profile.support_int8) {
+ AddCapability(spv::Capability::Int8);
+ U8 = Name(TypeInt(8, false), "u8");
+ S8 = Name(TypeInt(8, true), "s8");
+ }
+ if (info.uses_int16 && profile.support_int16) {
+ AddCapability(spv::Capability::Int16);
+ U16 = Name(TypeInt(16, false), "u16");
+ S16 = Name(TypeInt(16, true), "s16");
+ }
+ if (info.uses_int64) {
+ AddCapability(spv::Capability::Int64);
+ U64 = Name(TypeInt(64, false), "u64");
+ }
+ if (info.uses_fp16) {
+ AddCapability(spv::Capability::Float16);
+ F16.Define(*this, TypeFloat(16), "f16");
+ }
+ if (info.uses_fp64) {
+ AddCapability(spv::Capability::Float64);
+ F64.Define(*this, TypeFloat(64), "f64");
+ }
+}
+
+void EmitContext::DefineCommonConstants() {
+ true_value = ConstantTrue(U1);
+ false_value = ConstantFalse(U1);
+ u32_zero_value = Const(0U);
+ f32_zero_value = Const(0.0f);
+}
+
+void EmitContext::DefineInterfaces(const IR::Program& program) {
+ DefineInputs(program);
+ DefineOutputs(program);
+}
+
+void EmitContext::DefineLocalMemory(const IR::Program& program) {
+ if (program.local_memory_size == 0) {
+ return;
+ }
+ const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
+ const Id type{TypeArray(U32[1], Const(num_elements))};
+ const Id pointer{TypePointer(spv::StorageClass::Private, type)};
+ local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(local_memory);
+ }
+}
+
+void EmitContext::DefineSharedMemory(const IR::Program& program) {
+ if (program.shared_memory_size == 0) {
+ return;
+ }
+ const auto make{[&](Id element_type, u32 element_size) {
+ const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
+ const Id array_type{TypeArray(element_type, Const(num_elements))};
+ Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+ const Id struct_type{TypeStruct(array_type)};
+ MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
+ Decorate(struct_type, spv::Decoration::Block);
+
+ const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
+ const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
+ const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
+ Decorate(variable, spv::Decoration::Aliased);
+ interfaces.push_back(variable);
+
+ return std::make_tuple(variable, element_pointer, pointer);
+ }};
+ if (profile.support_explicit_workgroup_layout) {
+ AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
+ if (program.info.uses_int8) {
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
+ std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
+ }
+ if (program.info.uses_int16) {
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
+ std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
+ }
+ if (program.info.uses_int64) {
+ std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8);
+ }
+ std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
+ std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
+ std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
+ return;
+ }
+ const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
+ const Id type{TypeArray(U32[1], Const(num_elements))};
+ shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
+
+ shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
+ shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
+ interfaces.push_back(shared_memory_u32);
+
+ const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
+ const auto make_function{[&](u32 mask, u32 size) {
+ const Id loop_header{OpLabel()};
+ const Id continue_block{OpLabel()};
+ const Id merge_block{OpLabel()};
+
+ const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id insert_value{OpFunctionParameter(U32[1])};
+ AddLabel();
+ OpBranch(loop_header);
+
+ AddLabel(loop_header);
+ const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+ const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
+ const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
+ const Id count{Const(size)};
+ OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+ OpBranch(continue_block);
+
+ AddLabel(continue_block);
+ const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
+ const Id old_value{OpLoad(U32[1], word_pointer)};
+ const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
+ const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value,
+ u32_zero_value, new_value, old_value)};
+ const Id success{OpIEqual(U1, atomic_res, old_value)};
+ OpBranchConditional(success, merge_block, loop_header);
+
+ AddLabel(merge_block);
+ OpReturn();
+ OpFunctionEnd();
+ return func;
+ }};
+ if (program.info.uses_int8) {
+ shared_store_u8_func = make_function(24, 8);
+ }
+ if (program.info.uses_int16) {
+ shared_store_u16_func = make_function(16, 16);
+ }
+}
+
+void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
+ if (program.info.uses_shared_increment) {
+ increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type,
+ shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
+ }
+ if (program.info.uses_shared_decrement) {
+ decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type,
+ shared_u32, U32[1], U32[1], spv::Scope::Workgroup);
+ }
+}
+
+void EmitContext::DefineAttributeMemAccess(const Info& info) {
+ const auto make_load{[&] {
+ const bool is_array{stage == Stage::Geometry};
+ const Id end_block{OpLabel()};
+ const Id default_label{OpLabel()};
+
+ const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1])
+ : TypeFunction(F32[1], U32[1])};
+ const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}};
+
+ AddLabel();
+ const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+ const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
+ const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
+ std::vector<Sirit::Literal> literals;
+ std::vector<Id> labels;
+ if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
+ labels.push_back(OpLabel());
+ }
+ const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+ for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) {
+ if (!info.loads.Generic(index)) {
+ continue;
+ }
+ literals.push_back(base_attribute_value + index);
+ labels.push_back(OpLabel());
+ }
+ OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
+ OpSwitch(compare_index, default_label, literals, labels);
+ AddLabel(default_label);
+ OpReturnValue(Const(0.0f));
+ size_t label_index{0};
+ if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
+ AddLabel(labels[label_index]);
+ const Id pointer{is_array
+ ? OpAccessChain(input_f32, input_position, vertex, masked_index)
+ : OpAccessChain(input_f32, input_position, masked_index)};
+ const Id result{OpLoad(F32[1], pointer)};
+ OpReturnValue(result);
+ ++label_index;
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.loads.Generic(index)) {
+ continue;
+ }
+ AddLabel(labels[label_index]);
+ const auto type{AttrTypes(*this, static_cast<u32>(index))};
+ if (!type) {
+ OpReturnValue(Const(0.0f));
+ ++label_index;
+ continue;
+ }
+ const Id generic_id{input_generics.at(index)};
+ const Id pointer{is_array
+ ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
+ : OpAccessChain(type->pointer, generic_id, masked_index)};
+ const Id value{OpLoad(type->id, pointer)};
+ const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
+ OpReturnValue(result);
+ ++label_index;
+ }
+ AddLabel(end_block);
+ OpUnreachable();
+ OpFunctionEnd();
+ return func;
+ }};
+ const auto make_store{[&] {
+ const Id end_block{OpLabel()};
+ const Id default_label{OpLabel()};
+
+ const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])};
+ const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id store_value{OpFunctionParameter(F32[1])};
+ AddLabel();
+ const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
+ const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))};
+ const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))};
+ std::vector<Sirit::Literal> literals;
+ std::vector<Id> labels;
+ if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
+ literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2);
+ labels.push_back(OpLabel());
+ }
+ const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.stores.Generic(index)) {
+ continue;
+ }
+ literals.push_back(base_attribute_value + static_cast<u32>(index));
+ labels.push_back(OpLabel());
+ }
+ if (info.stores.ClipDistances()) {
+ literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2);
+ labels.push_back(OpLabel());
+ literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2);
+ labels.push_back(OpLabel());
+ }
+ OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
+ OpSwitch(compare_index, default_label, literals, labels);
+ AddLabel(default_label);
+ OpReturn();
+ size_t label_index{0};
+ if (info.stores.AnyComponent(IR::Attribute::PositionX)) {
+ AddLabel(labels[label_index]);
+ const Id pointer{OpAccessChain(output_f32, output_position, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (!info.stores.Generic(index)) {
+ continue;
+ }
+ if (output_generics[index][0].num_components != 4) {
+ throw NotImplementedException("Physical stores and transform feedbacks");
+ }
+ AddLabel(labels[label_index]);
+ const Id generic_id{output_generics[index][0].id};
+ const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ if (info.stores.ClipDistances()) {
+ AddLabel(labels[label_index]);
+ const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)};
+ OpStore(pointer, store_value);
+ OpReturn();
+ ++label_index;
+ AddLabel(labels[label_index]);
+ const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))};
+ const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)};
+ OpStore(pointer2, store_value);
+ OpReturn();
+ ++label_index;
+ }
+ AddLabel(end_block);
+ OpUnreachable();
+ OpFunctionEnd();
+ return func;
+ }};
+ if (info.loads_indexed_attributes) {
+ indexed_load_func = make_load();
+ }
+ if (info.stores_indexed_attributes) {
+ indexed_store_func = make_store();
+ }
+}
+
+void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
+ if (!info.uses_global_memory || !profile.support_int64) {
+ return;
+ }
+ using DefPtr = Id StorageDefinitions::*;
+ const Id zero{u32_zero_value};
+ const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
+ auto&& callback) {
+ AddLabel();
+ const size_t num_buffers{info.storage_buffers_descriptors.size()};
+ for (size_t index = 0; index < num_buffers; ++index) {
+ if (!info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const auto& ssbo{info.storage_buffers_descriptors[index]};
+ const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
+ const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
+ const Id ssbo_addr_pointer{OpAccessChain(
+ uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
+ const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
+ zero, ssbo_size_cbuf_offset)};
+
+ const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
+ const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
+ const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
+ const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
+ OpULessThan(U1, addr, ssbo_end))};
+ const Id then_label{OpLabel()};
+ const Id else_label{OpLabel()};
+ OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
+ OpBranchConditional(cond, then_label, else_label);
+ AddLabel(then_label);
+ const Id ssbo_id{ssbos[index].*ssbo_member};
+ const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
+ const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
+ const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
+ callback(ssbo_pointer);
+ AddLabel(else_label);
+ }
+ }};
+ const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
+ const Id function_type{TypeFunction(type, U64)};
+ const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
+ const Id addr{OpFunctionParameter(U64)};
+ define_body(ssbo_member, addr, element_pointer, shift,
+ [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
+ OpReturnValue(ConstantNull(type));
+ OpFunctionEnd();
+ return func_id;
+ }};
+ const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
+ const Id function_type{TypeFunction(void_id, U64, type)};
+ const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
+ const Id addr{OpFunctionParameter(U64)};
+ const Id data{OpFunctionParameter(type)};
+ define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
+ OpStore(ssbo_pointer, data);
+ OpReturn();
+ });
+ OpReturn();
+ OpFunctionEnd();
+ return func_id;
+ }};
+ const auto define{
+ [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
+ const Id element_type{type_def.element};
+ const u32 shift{static_cast<u32>(std::countr_zero(size))};
+ const Id load_func{define_load(ssbo_member, element_type, type, shift)};
+ const Id write_func{define_write(ssbo_member, element_type, type, shift)};
+ return std::make_pair(load_func, write_func);
+ }};
+ std::tie(load_global_func_u32, write_global_func_u32) =
+ define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
+ std::tie(load_global_func_u32x2, write_global_func_u32x2) =
+ define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
+ std::tie(load_global_func_u32x4, write_global_func_u32x4) =
+ define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
+}
+
+void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
+ if (info.constant_buffer_descriptors.empty()) {
+ return;
+ }
+ if (!profile.support_descriptor_aliasing) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u',
+ sizeof(u32[4]));
+ for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
+ binding += desc.count;
+ }
+ return;
+ }
+ IR::Type types{info.used_constant_buffer_types};
+ if (True(types & IR::Type::U8)) {
+ if (profile.support_int8) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
+ DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8));
+ } else {
+ types |= IR::Type::U32;
+ }
+ }
+ if (True(types & IR::Type::U16)) {
+ if (profile.support_int16) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u',
+ sizeof(u16));
+ DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's',
+ sizeof(s16));
+ } else {
+ types |= IR::Type::U32;
+ }
+ }
+ if (True(types & IR::Type::U32)) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u',
+ sizeof(u32));
+ }
+ if (True(types & IR::Type::F32)) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f',
+ sizeof(f32));
+ }
+ if (True(types & IR::Type::U32x2)) {
+ DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u',
+ sizeof(u32[2]));
+ }
+ binding += static_cast<u32>(info.constant_buffer_descriptors.size());
+}
+
+void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
+ if (info.storage_buffers_descriptors.empty()) {
+ return;
+ }
+ AddExtension("SPV_KHR_storage_buffer_storage_class");
+
+ const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types
+ : IR::Type::U32};
+ if (profile.support_int8 && True(used_types & IR::Type::U8)) {
+ DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8,
+ sizeof(u8));
+ DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8,
+ sizeof(u8));
+ }
+ if (profile.support_int16 && True(used_types & IR::Type::U16)) {
+ DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16,
+ sizeof(u16));
+ DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16,
+ sizeof(u16));
+ }
+ if (True(used_types & IR::Type::U32)) {
+ DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1],
+ sizeof(u32));
+ }
+ if (True(used_types & IR::Type::F32)) {
+ DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1],
+ sizeof(f32));
+ }
+ if (True(used_types & IR::Type::U64)) {
+ DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64,
+ sizeof(u64));
+ }
+ if (True(used_types & IR::Type::U32x2)) {
+ DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2],
+ sizeof(u32[2]));
+ }
+ if (True(used_types & IR::Type::U32x4)) {
+ DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4],
+ sizeof(u32[4]));
+ }
+ for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
+ binding += desc.count;
+ }
+ const bool needs_function{
+ info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add ||
+ info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max ||
+ info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max};
+ if (needs_function) {
+ AddCapability(spv::Capability::VariablePointersStorageBuffer);
+ }
+ if (info.uses_global_increment) {
+ increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array,
+ storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
+ }
+ if (info.uses_global_decrement) {
+ decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array,
+ storage_types.U32.element, U32[1], U32[1], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32_add) {
+ f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+ storage_types.U32.element, F32[1], U32[1], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f16x2_add) {
+ f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+ storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f16x2_min) {
+ f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
+ storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f16x2_max) {
+ f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
+ storage_types.U32.element, F16[2], F16[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32x2_add) {
+ f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array,
+ storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32x2_min) {
+ f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array,
+ storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+ }
+ if (info.uses_atomic_f32x2_max) {
+ f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array,
+ storage_types.U32.element, F32[2], F32[2], spv::Scope::Device);
+ }
+}
+
+void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
+ if (info.texture_buffer_descriptors.empty()) {
+ return;
+ }
+ const spv::ImageFormat format{spv::ImageFormat::Unknown};
+ image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
+ sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
+
+ const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)};
+ texture_buffers.reserve(info.texture_buffer_descriptors.size());
+ for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Array of texture buffers");
+ }
+ const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "texbuf"));
+ texture_buffers.push_back({
+ .id = id,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+}
+
+void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
+ image_buffers.reserve(info.image_buffer_descriptors.size());
+ for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Array of image buffers");
+ }
+ const spv::ImageFormat format{GetImageFormat(desc.format)};
+ const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+ const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "imgbuf"));
+ image_buffers.push_back({
+ .id = id,
+ .image_type = image_type,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+}
+
+void EmitContext::DefineTextures(const Info& info, u32& binding) {
+ textures.reserve(info.texture_descriptors.size());
+ for (const TextureDescriptor& desc : info.texture_descriptors) {
+ const Id image_type{ImageType(*this, desc)};
+ const Id sampled_type{TypeSampledImage(image_type)};
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)};
+ const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)};
+ const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "tex"));
+ textures.push_back({
+ .id = id,
+ .sampled_type = sampled_type,
+ .pointer_type = pointer_type,
+ .image_type = image_type,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+ if (info.uses_atomic_image_u32) {
+ image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
+ }
+}
+
+void EmitContext::DefineImages(const Info& info, u32& binding) {
+ images.reserve(info.image_descriptors.size());
+ for (const ImageDescriptor& desc : info.image_descriptors) {
+ if (desc.count != 1) {
+ throw NotImplementedException("Array of images");
+ }
+ const Id image_type{ImageType(*this, desc)};
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+ const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
+ Decorate(id, spv::Decoration::Binding, binding);
+ Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ Name(id, NameOf(stage, desc, "img"));
+ images.push_back({
+ .id = id,
+ .image_type = image_type,
+ .count = desc.count,
+ });
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
+ ++binding;
+ }
+}
+
+void EmitContext::DefineInputs(const IR::Program& program) {
+ const Info& info{program.info};
+ const VaryingState loads{info.loads.mask | info.passthrough.mask};
+
+ if (info.uses_workgroup_id) {
+ workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
+ }
+ if (info.uses_local_invocation_id) {
+ local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId);
+ }
+ if (info.uses_invocation_id) {
+ invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId);
+ }
+ if (info.uses_sample_id) {
+ sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId);
+ }
+ if (info.uses_is_helper_invocation) {
+ is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
+ }
+ if (info.uses_subgroup_mask) {
+ subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
+ subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
+ subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
+ subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
+ subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
+ }
+ if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
+ (profile.warp_size_potentially_larger_than_guest &&
+ (info.uses_subgroup_vote || info.uses_subgroup_mask))) {
+ subgroup_local_invocation_id =
+ DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
+ }
+ if (info.uses_fswzadd) {
+ const Id f32_one{Const(1.0f)};
+ const Id f32_minus_one{Const(-1.0f)};
+ const Id f32_zero{Const(0.0f)};
+ fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
+ fswzadd_lut_b =
+ ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
+ }
+ if (loads[IR::Attribute::PrimitiveId]) {
+ primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId);
+ }
+ if (loads.AnyComponent(IR::Attribute::PositionX)) {
+ const bool is_fragment{stage != Stage::Fragment};
+ const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
+ input_position = DefineInput(*this, F32[4], true, built_in);
+ if (profile.support_geometry_shader_passthrough) {
+ if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+ Decorate(input_position, spv::Decoration::PassthroughNV);
+ }
+ }
+ }
+ if (loads[IR::Attribute::InstanceId]) {
+ if (profile.support_vertex_instance_id) {
+ instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
+ } else {
+ instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
+ base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
+ }
+ }
+ if (loads[IR::Attribute::VertexId]) {
+ if (profile.support_vertex_instance_id) {
+ vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
+ } else {
+ vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
+ base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+ }
+ }
+ if (loads[IR::Attribute::FrontFace]) {
+ front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
+ }
+ if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) {
+ point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord);
+ }
+ if (loads[IR::Attribute::TessellationEvaluationPointU] ||
+ loads[IR::Attribute::TessellationEvaluationPointV]) {
+ tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const AttributeType input_type{runtime_info.generic_input_types[index]};
+ if (!runtime_info.previous_stage_stores.Generic(index)) {
+ continue;
+ }
+ if (!loads.Generic(index)) {
+ continue;
+ }
+ if (input_type == AttributeType::Disabled) {
+ continue;
+ }
+ const Id type{GetAttributeType(*this, input_type)};
+ const Id id{DefineInput(*this, type, true)};
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ Name(id, fmt::format("in_attr{}", index));
+ input_generics[index] = id;
+
+ if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
+ Decorate(id, spv::Decoration::PassthroughNV);
+ }
+ if (stage != Stage::Fragment) {
+ continue;
+ }
+ switch (info.interpolation[index]) {
+ case Interpolation::Smooth:
+ // Default
+ // Decorate(id, spv::Decoration::Smooth);
+ break;
+ case Interpolation::NoPerspective:
+ Decorate(id, spv::Decoration::NoPerspective);
+ break;
+ case Interpolation::Flat:
+ Decorate(id, spv::Decoration::Flat);
+ break;
+ }
+ }
+ if (stage == Stage::TessellationEval) {
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ const Id id{DefineInput(*this, F32[4], false)};
+ Decorate(id, spv::Decoration::Patch);
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ patches[index] = id;
+ }
+ }
+}
+
+void EmitContext::DefineOutputs(const IR::Program& program) {
+ const Info& info{program.info};
+ const std::optional<u32> invocations{program.invocations};
+ if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) {
+ output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position);
+ }
+ if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing PointSize in fragment stage");
+ }
+ output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize);
+ }
+ if (info.stores.ClipDistances()) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing ClipDistance in fragment stage");
+ }
+ const Id type{TypeArray(F32[1], Const(8U))};
+ clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance);
+ }
+ if (info.stores[IR::Attribute::Layer] &&
+ (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing Layer in fragment stage");
+ }
+ layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer);
+ }
+ if (info.stores[IR::Attribute::ViewportIndex] &&
+ (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) {
+ if (stage == Stage::Fragment) {
+ throw NotImplementedException("Storing ViewportIndex in fragment stage");
+ }
+ viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex);
+ }
+ if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+ viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
+ spv::BuiltIn::ViewportMaskNV);
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ if (info.stores.Generic(index)) {
+ DefineGenericOutput(*this, index, invocations);
+ }
+ }
+ switch (stage) {
+ case Stage::TessellationControl:
+ if (info.stores_tess_level_outer) {
+ const Id type{TypeArray(F32[1], Const(4U))};
+ output_tess_level_outer =
+ DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter);
+ Decorate(output_tess_level_outer, spv::Decoration::Patch);
+ }
+ if (info.stores_tess_level_inner) {
+ const Id type{TypeArray(F32[1], Const(2U))};
+ output_tess_level_inner =
+ DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner);
+ Decorate(output_tess_level_inner, spv::Decoration::Patch);
+ }
+ for (size_t index = 0; index < info.uses_patches.size(); ++index) {
+ if (!info.uses_patches[index]) {
+ continue;
+ }
+ const Id id{DefineOutput(*this, F32[4], std::nullopt)};
+ Decorate(id, spv::Decoration::Patch);
+ Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
+ patches[index] = id;
+ }
+ break;
+ case Stage::Fragment:
+ for (u32 index = 0; index < 8; ++index) {
+ if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) {
+ continue;
+ }
+ frag_color[index] = DefineOutput(*this, F32[4], std::nullopt);
+ Decorate(frag_color[index], spv::Decoration::Location, index);
+ Name(frag_color[index], fmt::format("frag_color{}", index));
+ }
+ if (info.stores_frag_depth) {
+ frag_depth = DefineOutput(*this, F32[1], std::nullopt);
+ Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
+ }
+ if (info.stores_sample_mask) {
+ sample_mask = DefineOutput(*this, U32[1], std::nullopt);
+ Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
new file mode 100644
index 000000000..e277bc358
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -0,0 +1,307 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string_view>
+
+#include <sirit/sirit.h>
+
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+#include "shader_recompiler/runtime_info.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+class VectorTypes {
+public:
+ void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name);
+
+ [[nodiscard]] Id operator[](size_t size) const noexcept {
+ return defs[size - 1];
+ }
+
+private:
+ std::array<Id, 4> defs{};
+};
+
+struct TextureDefinition {
+ Id id;
+ Id sampled_type;
+ Id pointer_type;
+ Id image_type;
+ u32 count;
+};
+
+struct TextureBufferDefinition {
+ Id id;
+ u32 count;
+};
+
+struct ImageBufferDefinition {
+ Id id;
+ Id image_type;
+ u32 count;
+};
+
+struct ImageDefinition {
+ Id id;
+ Id image_type;
+ u32 count;
+};
+
+struct UniformDefinitions {
+ Id U8{};
+ Id S8{};
+ Id U16{};
+ Id S16{};
+ Id U32{};
+ Id F32{};
+ Id U32x2{};
+ Id U32x4{};
+};
+
+struct StorageTypeDefinition {
+ Id array{};
+ Id element{};
+};
+
+struct StorageTypeDefinitions {
+ StorageTypeDefinition U8{};
+ StorageTypeDefinition S8{};
+ StorageTypeDefinition U16{};
+ StorageTypeDefinition S16{};
+ StorageTypeDefinition U32{};
+ StorageTypeDefinition U64{};
+ StorageTypeDefinition F32{};
+ StorageTypeDefinition U32x2{};
+ StorageTypeDefinition U32x4{};
+};
+
+struct StorageDefinitions {
+ Id U8{};
+ Id S8{};
+ Id U16{};
+ Id S16{};
+ Id U32{};
+ Id F32{};
+ Id U64{};
+ Id U32x2{};
+ Id U32x4{};
+};
+
+struct GenericElementInfo {
+ Id id{};
+ u32 first_element{};
+ u32 num_components{};
+};
+
+class EmitContext final : public Sirit::Module {
+public:
+ explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& binding);
+ ~EmitContext();
+
+ [[nodiscard]] Id Def(const IR::Value& value);
+
+ [[nodiscard]] Id BitOffset8(const IR::Value& offset);
+ [[nodiscard]] Id BitOffset16(const IR::Value& offset);
+
+ Id Const(u32 value) {
+ return Constant(U32[1], value);
+ }
+
+ Id Const(u32 element_1, u32 element_2) {
+ return ConstantComposite(U32[2], Const(element_1), Const(element_2));
+ }
+
+ Id Const(u32 element_1, u32 element_2, u32 element_3) {
+ return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3));
+ }
+
+ Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) {
+ return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3),
+ Const(element_4));
+ }
+
+ Id SConst(s32 value) {
+ return Constant(S32[1], value);
+ }
+
+ Id SConst(s32 element_1, s32 element_2) {
+ return ConstantComposite(S32[2], SConst(element_1), SConst(element_2));
+ }
+
+ Id SConst(s32 element_1, s32 element_2, s32 element_3) {
+ return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3));
+ }
+
+ Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) {
+ return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3),
+ SConst(element_4));
+ }
+
+ Id Const(f32 value) {
+ return Constant(F32[1], value);
+ }
+
+ const Profile& profile;
+ const RuntimeInfo& runtime_info;
+ Stage stage{};
+
+ Id void_id{};
+ Id U1{};
+ Id U8{};
+ Id S8{};
+ Id U16{};
+ Id S16{};
+ Id U64{};
+ VectorTypes F32;
+ VectorTypes U32;
+ VectorTypes S32;
+ VectorTypes F16;
+ VectorTypes F64;
+
+ Id true_value{};
+ Id false_value{};
+ Id u32_zero_value{};
+ Id f32_zero_value{};
+
+ UniformDefinitions uniform_types;
+ StorageTypeDefinitions storage_types;
+
+ Id private_u32{};
+
+ Id shared_u8{};
+ Id shared_u16{};
+ Id shared_u32{};
+ Id shared_u64{};
+ Id shared_u32x2{};
+ Id shared_u32x4{};
+
+ Id input_f32{};
+ Id input_u32{};
+ Id input_s32{};
+
+ Id output_f32{};
+ Id output_u32{};
+
+ Id image_buffer_type{};
+ Id sampled_texture_buffer_type{};
+ Id image_u32{};
+
+ std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
+ std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{};
+ std::vector<TextureBufferDefinition> texture_buffers;
+ std::vector<ImageBufferDefinition> image_buffers;
+ std::vector<TextureDefinition> textures;
+ std::vector<ImageDefinition> images;
+
+ Id workgroup_id{};
+ Id local_invocation_id{};
+ Id invocation_id{};
+ Id sample_id{};
+ Id is_helper_invocation{};
+ Id subgroup_local_invocation_id{};
+ Id subgroup_mask_eq{};
+ Id subgroup_mask_lt{};
+ Id subgroup_mask_le{};
+ Id subgroup_mask_gt{};
+ Id subgroup_mask_ge{};
+ Id instance_id{};
+ Id instance_index{};
+ Id base_instance{};
+ Id vertex_id{};
+ Id vertex_index{};
+ Id base_vertex{};
+ Id front_face{};
+ Id point_coord{};
+ Id tess_coord{};
+ Id clip_distances{};
+ Id layer{};
+ Id viewport_index{};
+ Id viewport_mask{};
+ Id primitive_id{};
+
+ Id fswzadd_lut_a{};
+ Id fswzadd_lut_b{};
+
+ Id indexed_load_func{};
+ Id indexed_store_func{};
+
+ Id local_memory{};
+
+ Id shared_memory_u8{};
+ Id shared_memory_u16{};
+ Id shared_memory_u32{};
+ Id shared_memory_u64{};
+ Id shared_memory_u32x2{};
+ Id shared_memory_u32x4{};
+
+ Id shared_memory_u32_type{};
+
+ Id shared_store_u8_func{};
+ Id shared_store_u16_func{};
+ Id increment_cas_shared{};
+ Id increment_cas_ssbo{};
+ Id decrement_cas_shared{};
+ Id decrement_cas_ssbo{};
+ Id f32_add_cas{};
+ Id f16x2_add_cas{};
+ Id f16x2_min_cas{};
+ Id f16x2_max_cas{};
+ Id f32x2_add_cas{};
+ Id f32x2_min_cas{};
+ Id f32x2_max_cas{};
+
+ Id load_global_func_u32{};
+ Id load_global_func_u32x2{};
+ Id load_global_func_u32x4{};
+ Id write_global_func_u32{};
+ Id write_global_func_u32x2{};
+ Id write_global_func_u32x4{};
+
+ Id input_position{};
+ std::array<Id, 32> input_generics{};
+
+ Id output_point_size{};
+ Id output_position{};
+ std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
+
+ Id output_tess_level_outer{};
+ Id output_tess_level_inner{};
+ std::array<Id, 30> patches{};
+
+ std::array<Id, 8> frag_color{};
+ Id sample_mask{};
+ Id frag_depth{};
+
+ std::vector<Id> interfaces;
+
+private:
+ void DefineCommonTypes(const Info& info);
+ void DefineCommonConstants();
+ void DefineInterfaces(const IR::Program& program);
+ void DefineLocalMemory(const IR::Program& program);
+ void DefineSharedMemory(const IR::Program& program);
+ void DefineSharedMemoryFunctions(const IR::Program& program);
+ void DefineConstantBuffers(const Info& info, u32& binding);
+ void DefineStorageBuffers(const Info& info, u32& binding);
+ void DefineTextureBuffers(const Info& info, u32& binding);
+ void DefineImageBuffers(const Info& info, u32& binding);
+ void DefineTextures(const Info& info, u32& binding);
+ void DefineImages(const Info& info, u32& binding);
+ void DefineAttributeMemAccess(const Info& info);
+ void DefineGlobalMemoryFunctions(const Info& info);
+
+ void DefineInputs(const IR::Program& program);
+ void DefineOutputs(const IR::Program& program);
+};
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
new file mode 100644
index 000000000..d7a86e270
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -0,0 +1,541 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <span>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+template <class Func>
+struct FuncTraits {};
+
+template <class ReturnType_, class... Args>
+struct FuncTraits<ReturnType_ (*)(Args...)> {
+ using ReturnType = ReturnType_;
+
+ static constexpr size_t NUM_ARGS = sizeof...(Args);
+
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+};
+
+template <auto func, typename... Args>
+void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) {
+ inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...));
+}
+
+template <typename ArgType>
+ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
+ if constexpr (std::is_same_v<ArgType, Id>) {
+ return ctx.Def(arg);
+ } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
+ return arg;
+ } else if constexpr (std::is_same_v<ArgType, u32>) {
+ return arg.U32();
+ } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
+ return arg.Attribute();
+ } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+ return arg.Patch();
+ } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
+ return arg.Reg();
+ }
+}
+
+template <auto func, bool is_first_arg_inst, size_t... I>
+void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
+ using Traits = FuncTraits<decltype(func)>;
+ if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) {
+ if constexpr (is_first_arg_inst) {
+ SetDefinition<func>(
+ ctx, inst, inst,
+ Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ SetDefinition<func>(
+ ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ } else {
+ if constexpr (is_first_arg_inst) {
+ func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...);
+ } else {
+ func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...);
+ }
+ }
+}
+
+template <auto func>
+void Invoke(EmitContext& ctx, IR::Inst* inst) {
+ using Traits = FuncTraits<decltype(func)>;
+ static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
+ if constexpr (Traits::NUM_ARGS == 1) {
+ Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
+ } else {
+ using FirstArgType = typename Traits::template ArgType<1>;
+ static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>;
+ using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
+ Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
+ }
+}
+
+void EmitInst(EmitContext& ctx, IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+#define OPCODE(name, result_type, ...) \
+ case IR::Opcode::name: \
+ return Invoke<&Emit##name>(ctx, inst);
+#include "shader_recompiler/frontend/ir/opcodes.inc"
+#undef OPCODE
+ }
+ throw LogicError("Invalid opcode {}", inst->GetOpcode());
+}
+
+Id TypeId(const EmitContext& ctx, IR::Type type) {
+ switch (type) {
+ case IR::Type::U1:
+ return ctx.U1;
+ case IR::Type::U32:
+ return ctx.U32[1];
+ default:
+ throw NotImplementedException("Phi node type {}", type);
+ }
+}
+
+void Traverse(EmitContext& ctx, IR::Program& program) {
+ IR::Block* current_block{};
+ for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
+ switch (node.type) {
+ case IR::AbstractSyntaxNode::Type::Block: {
+ const Id label{node.data.block->Definition<Id>()};
+ if (current_block) {
+ ctx.OpBranch(label);
+ }
+ current_block = node.data.block;
+ ctx.AddLabel(label);
+ for (IR::Inst& inst : node.data.block->Instructions()) {
+ EmitInst(ctx, &inst);
+ }
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::If: {
+ const Id if_label{node.data.if_node.body->Definition<Id>()};
+ const Id endif_label{node.data.if_node.merge->Definition<Id>()};
+ ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::Loop: {
+ const Id body_label{node.data.loop.body->Definition<Id>()};
+ const Id continue_label{node.data.loop.continue_block->Definition<Id>()};
+ const Id endloop_label{node.data.loop.merge->Definition<Id>()};
+
+ ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone);
+ ctx.OpBranch(body_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::Break: {
+ const Id break_label{node.data.break_node.merge->Definition<Id>()};
+ const Id skip_label{node.data.break_node.skip->Definition<Id>()};
+ ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::EndIf:
+ if (current_block) {
+ ctx.OpBranch(node.data.end_if.merge->Definition<Id>());
+ }
+ break;
+ case IR::AbstractSyntaxNode::Type::Repeat: {
+ Id cond{ctx.Def(node.data.repeat.cond)};
+ if (!Settings::values.disable_shader_loop_safety_checks) {
+ const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])};
+ const Id safety_counter{ctx.AddGlobalVariable(
+ pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))};
+ if (ctx.profile.supported_spirv >= 0x00010400) {
+ ctx.interfaces.push_back(safety_counter);
+ }
+ const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)};
+ const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))};
+ ctx.OpStore(safety_counter, new_counter);
+
+ const Id safety_cond{
+ ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)};
+ cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond);
+ }
+ const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()};
+ const Id merge_label{node.data.repeat.merge->Definition<Id>()};
+ ctx.OpBranchConditional(cond, loop_header_label, merge_label);
+ break;
+ }
+ case IR::AbstractSyntaxNode::Type::Return:
+ ctx.OpReturn();
+ break;
+ case IR::AbstractSyntaxNode::Type::Unreachable:
+ ctx.OpUnreachable();
+ break;
+ }
+ if (node.type != IR::AbstractSyntaxNode::Type::Block) {
+ current_block = nullptr;
+ }
+ }
+}
+
+Id DefineMain(EmitContext& ctx, IR::Program& program) {
+ const Id void_function{ctx.TypeFunction(ctx.void_id)};
+ const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
+ for (IR::Block* const block : program.blocks) {
+ block->SetDefinition(ctx.OpLabel());
+ }
+ Traverse(ctx, program);
+ ctx.OpFunctionEnd();
+ return main;
+}
+
+spv::ExecutionMode ExecutionMode(TessPrimitive primitive) {
+ switch (primitive) {
+ case TessPrimitive::Isolines:
+ return spv::ExecutionMode::Isolines;
+ case TessPrimitive::Triangles:
+ return spv::ExecutionMode::Triangles;
+ case TessPrimitive::Quads:
+ return spv::ExecutionMode::Quads;
+ }
+ throw InvalidArgument("Tessellation primitive {}", primitive);
+}
+
+spv::ExecutionMode ExecutionMode(TessSpacing spacing) {
+ switch (spacing) {
+ case TessSpacing::Equal:
+ return spv::ExecutionMode::SpacingEqual;
+ case TessSpacing::FractionalOdd:
+ return spv::ExecutionMode::SpacingFractionalOdd;
+ case TessSpacing::FractionalEven:
+ return spv::ExecutionMode::SpacingFractionalEven;
+ }
+ throw InvalidArgument("Tessellation spacing {}", spacing);
+}
+
+void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
+ const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
+ spv::ExecutionModel execution_model{};
+ switch (program.stage) {
+ case Stage::Compute: {
+ const std::array<u32, 3> workgroup_size{program.workgroup_size};
+ execution_model = spv::ExecutionModel::GLCompute;
+ ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
+ workgroup_size[1], workgroup_size[2]);
+ break;
+ }
+ case Stage::VertexB:
+ execution_model = spv::ExecutionModel::Vertex;
+ break;
+ case Stage::TessellationControl:
+ execution_model = spv::ExecutionModel::TessellationControl;
+ ctx.AddCapability(spv::Capability::Tessellation);
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations);
+ break;
+ case Stage::TessellationEval:
+ execution_model = spv::ExecutionModel::TessellationEvaluation;
+ ctx.AddCapability(spv::Capability::Tessellation);
+ ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive));
+ ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing));
+ ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise
+ ? spv::ExecutionMode::VertexOrderCw
+ : spv::ExecutionMode::VertexOrderCcw);
+ break;
+ case Stage::Geometry:
+ execution_model = spv::ExecutionModel::Geometry;
+ ctx.AddCapability(spv::Capability::Geometry);
+ ctx.AddCapability(spv::Capability::GeometryStreams);
+ switch (ctx.runtime_info.input_topology) {
+ case InputTopology::Points:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints);
+ break;
+ case InputTopology::Lines:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines);
+ break;
+ case InputTopology::LinesAdjacency:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency);
+ break;
+ case InputTopology::Triangles:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles);
+ break;
+ case InputTopology::TrianglesAdjacency:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency);
+ break;
+ }
+ switch (program.output_topology) {
+ case OutputTopology::PointList:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints);
+ break;
+ case OutputTopology::LineStrip:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip);
+ break;
+ case OutputTopology::TriangleStrip:
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip);
+ break;
+ }
+ if (program.info.stores[IR::Attribute::PointSize]) {
+ ctx.AddCapability(spv::Capability::GeometryPointSize);
+ }
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices);
+ ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations);
+ if (program.is_geometry_passthrough) {
+ if (ctx.profile.support_geometry_shader_passthrough) {
+ ctx.AddExtension("SPV_NV_geometry_shader_passthrough");
+ ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV);
+ } else {
+ LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support");
+ }
+ }
+ break;
+ case Stage::Fragment:
+ execution_model = spv::ExecutionModel::Fragment;
+ if (ctx.profile.lower_left_origin_mode) {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
+ } else {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
+ }
+ if (program.info.stores_frag_depth) {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
+ }
+ if (ctx.runtime_info.force_early_z) {
+ ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
+ }
+ break;
+ default:
+ throw NotImplementedException("Stage {}", program.stage);
+ }
+ ctx.AddEntryPoint(execution_model, main, "main", interfaces);
+}
+
+void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
+ Id main_func) {
+ const Info& info{program.info};
+ if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
+ LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
+ } else if (info.uses_fp32_denorms_flush) {
+ if (profile.support_fp32_denorm_flush) {
+ ctx.AddCapability(spv::Capability::DenormFlushToZero);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
+ } else {
+ // Drivers will most likely flush denorms by default, no need to warn
+ }
+ } else if (info.uses_fp32_denorms_preserve) {
+ if (profile.support_fp32_denorm_preserve) {
+ ctx.AddCapability(spv::Capability::DenormPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
+ } else {
+ LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
+ }
+ }
+ if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
+ // No separate denorm behavior
+ return;
+ }
+ if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
+ LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
+ } else if (info.uses_fp16_denorms_flush) {
+ if (profile.support_fp16_denorm_flush) {
+ ctx.AddCapability(spv::Capability::DenormFlushToZero);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
+ } else {
+ // Same as fp32, no need to warn as most drivers will flush by default
+ }
+ } else if (info.uses_fp16_denorms_preserve) {
+ if (profile.support_fp16_denorm_preserve) {
+ ctx.AddCapability(spv::Capability::DenormPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
+ } else {
+ LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
+ }
+ }
+}
+
+void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
+ EmitContext& ctx, Id main_func) {
+ if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
+ return;
+ }
+ if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
+ ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
+ }
+ if (profile.support_fp32_signed_zero_nan_preserve) {
+ ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
+ }
+ if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
+ ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
+ ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U);
+ }
+}
+
+void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
+ if (info.uses_sampled_1d) {
+ ctx.AddCapability(spv::Capability::Sampled1D);
+ }
+ if (info.uses_sparse_residency) {
+ ctx.AddCapability(spv::Capability::SparseResidency);
+ }
+ if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) {
+ ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
+ ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
+ }
+ if (info.stores[IR::Attribute::ViewportIndex]) {
+ ctx.AddCapability(spv::Capability::MultiViewport);
+ }
+ if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
+ ctx.AddExtension("SPV_NV_viewport_array2");
+ ctx.AddCapability(spv::Capability::ShaderViewportMaskNV);
+ }
+ if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) {
+ if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) {
+ ctx.AddExtension("SPV_EXT_shader_viewport_index_layer");
+ ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
+ }
+ }
+ if (!profile.support_vertex_instance_id &&
+ (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) {
+ ctx.AddExtension("SPV_KHR_shader_draw_parameters");
+ ctx.AddCapability(spv::Capability::DrawParameters);
+ }
+ if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
+ info.uses_subgroup_shuffles) &&
+ profile.support_vote) {
+ ctx.AddExtension("SPV_KHR_shader_ballot");
+ ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
+ if (!profile.warp_size_potentially_larger_than_guest) {
+ // vote ops are only used when not taking the long path
+ ctx.AddExtension("SPV_KHR_subgroup_vote");
+ ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
+ }
+ }
+ if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
+ ctx.AddCapability(spv::Capability::Int64Atomics);
+ }
+ if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
+ ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
+ }
+ if (info.uses_typeless_image_writes) {
+ ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
+ }
+ if (info.uses_image_buffers) {
+ ctx.AddCapability(spv::Capability::ImageBuffer);
+ }
+ if (info.uses_sample_id) {
+ ctx.AddCapability(spv::Capability::SampleRateShading);
+ }
+ if (!ctx.runtime_info.xfb_varyings.empty()) {
+ ctx.AddCapability(spv::Capability::TransformFeedback);
+ }
+ if (info.uses_derivatives) {
+ ctx.AddCapability(spv::Capability::DerivativeControl);
+ }
+ // TODO: Track this usage
+ ctx.AddCapability(spv::Capability::ImageGatherExtended);
+ ctx.AddCapability(spv::Capability::ImageQuery);
+ ctx.AddCapability(spv::Capability::SampledBuffer);
+}
+
+void PatchPhiNodes(IR::Program& program, EmitContext& ctx) {
+ auto inst{program.blocks.front()->begin()};
+ size_t block_index{0};
+ ctx.PatchDeferredPhi([&](size_t phi_arg) {
+ if (phi_arg == 0) {
+ ++inst;
+ if (inst == program.blocks[block_index]->end() ||
+ inst->GetOpcode() != IR::Opcode::Phi) {
+ do {
+ ++block_index;
+ inst = program.blocks[block_index]->begin();
+ } while (inst->GetOpcode() != IR::Opcode::Phi);
+ }
+ }
+ return ctx.Def(inst->Arg(phi_arg));
+ });
+}
+} // Anonymous namespace
+
+std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings) {
+ EmitContext ctx{profile, runtime_info, program, bindings};
+ const Id main{DefineMain(ctx, program)};
+ DefineEntryPoint(program, ctx, main);
+ if (profile.support_float_controls) {
+ ctx.AddExtension("SPV_KHR_float_controls");
+ SetupDenormControl(profile, program, ctx, main);
+ SetupSignedNanCapabilities(profile, program, ctx, main);
+ }
+ SetupCapabilities(profile, program.info, ctx);
+ PatchPhiNodes(program, ctx);
+ return ctx.Assemble();
+}
+
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst) {
+ const size_t num_args{inst->NumArgs()};
+ boost::container::small_vector<Id, 32> blocks;
+ blocks.reserve(num_args);
+ for (size_t index = 0; index < num_args; ++index) {
+ blocks.push_back(inst->PhiBlock(index)->Definition<Id>());
+ }
+ // The type of a phi instruction is stored in its flags
+ const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())};
+ return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size()));
+}
+
+void EmitVoid(EmitContext&) {}
+
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value) {
+ const Id id{ctx.Def(value)};
+ if (!Sirit::ValidId(id)) {
+ throw NotImplementedException("Forward identity declaration");
+ }
+ return id;
+}
+
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) {
+ const Id id{ctx.Def(value)};
+ if (!Sirit::ValidId(id)) {
+ throw NotImplementedException("Forward identity declaration");
+ }
+ return id;
+}
+
+void EmitReference(EmitContext&) {}
+
+void EmitPhiMove(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetZeroFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetSignFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetCarryFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetOverflowFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetSparseFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetInBoundsFromOp(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
new file mode 100644
index 000000000..db0c935fe
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -0,0 +1,27 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/backend/bindings.h"
+#include "shader_recompiler/backend/spirv/emit_context.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/profile.h"
+
+namespace Shader::Backend::SPIRV {
+
+[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
+ IR::Program& program, Bindings& bindings);
+
+[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) {
+ Bindings binding;
+ return EmitSPIRV(profile, {}, program, binding);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 000000000..9af8bb9e1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,448 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
+ const Id shift_id{ctx.Const(2U)};
+ Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ if (index_offset > 0) {
+ index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
+ }
+ return ctx.profile.support_explicit_workgroup_layout
+ ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
+ : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
+}
+
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
+ if (offset.IsImmediate()) {
+ const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
+ return ctx.Const(imm_offset);
+ }
+ const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+ const Id index{ctx.Def(offset)};
+ if (shift == 0) {
+ return index;
+ }
+ const Id shift_id{ctx.Const(shift)};
+ return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+}
+
+Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def,
+ Id StorageDefinitions::*member_ptr, const IR::Value& binding,
+ const IR::Value& offset, size_t element_size) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Dynamic storage buffer indexing");
+ }
+ const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
+ const Id index{StorageIndex(ctx, offset, element_size)};
+ return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+ const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+ const Id semantics{ctx.u32_zero_value};
+ return {scope, semantics};
+}
+
+Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const Id pointer{SharedPointer(ctx, offset)};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding,
+ offset, sizeof(u32))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+
+Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
+ Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+ if (ctx.profile.support_int64_atomics) {
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
+ binding, offset, sizeof(u64))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
+ }
+ LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
+ binding, offset, sizeof(u32[2]))};
+ const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
+ const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)};
+ ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
+ return original_value;
+}
+} // Anonymous namespace
+
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) {
+ const Id shift_id{ctx.Const(2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value);
+}
+
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) {
+ const Id shift_id{ctx.Const(2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value);
+}
+
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) {
+ const Id shift_id{ctx.Const(3U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
+ }
+ LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+ const Id pointer_1{SharedPointer(ctx, offset, 0)};
+ const Id pointer_2{SharedPointer(ctx, offset, 1)};
+ const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
+ const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
+ const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
+ ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
+ ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
+ return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
+}
+
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd,
+ &Sirit::Module::OpIAdd);
+}
+
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin,
+ &Sirit::Module::OpSMin);
+}
+
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin,
+ &Sirit::Module::OpUMin);
+}
+
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax,
+ &Sirit::Module::OpSMax);
+}
+
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax,
+ &Sirit::Module::OpUMax);
+}
+
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd,
+ &Sirit::Module::OpBitwiseAnd);
+}
+
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr,
+ &Sirit::Module::OpBitwiseOr);
+}
+
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor,
+ &Sirit::Module::OpBitwiseXor);
+}
+
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ if (ctx.profile.support_int64_atomics) {
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
+ binding, offset, sizeof(u64))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
+ }
+ LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+ const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
+ binding, offset, sizeof(u32[2]))};
+ const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
+ ctx.OpStore(pointer, value);
+ return original;
+}
+
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
+}
+
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
+ return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
+ return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
+ return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
+ return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
+ return ctx.OpBitcast(ctx.U32[1], result);
+}
+
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ const Id ssbo{ctx.ssbos[binding.U32()].U32};
+ const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
+ const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
+ return ctx.OpPackHalf2x16(ctx.U32[1], result);
+}
+
+Id EmitGlobalAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicIAdd64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMin64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMin64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicSMax64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicUMax64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicInc64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicDec64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAnd64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicOr64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicXor64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicExchange64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF16x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicAddF32x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF16x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMinF32x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
new file mode 100644
index 000000000..e0b52a001
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
+ const auto semantics{
+ spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
+ spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory |
+ spv::MemorySemanticsMask::ImageMemory};
+ ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics)));
+}
+} // Anonymous namespace
+
+void EmitBarrier(EmitContext& ctx) {
+ const auto execution{spv::Scope::Workgroup};
+ const auto memory{spv::Scope::Workgroup};
+ const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
+ spv::MemorySemanticsMask::WorkgroupMemory};
+ ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)),
+ ctx.Const(static_cast<u32>(memory)),
+ ctx.Const(static_cast<u32>(memory_semantics)));
+}
+
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx) {
+ MemoryBarrier(ctx, spv::Scope::Workgroup);
+}
+
+void EmitDeviceMemoryBarrier(EmitContext& ctx) {
+ MemoryBarrier(ctx, spv::Scope::Device);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
new file mode 100644
index 000000000..bb11f4f4e
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitBitCastU16F16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBitCastU32F32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[1], value);
+}
+
+void EmitBitCastU64F64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitBitCastF16U16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBitCastF32U32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.F32[1], value);
+}
+
+void EmitBitCastF64U64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitPackUint2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U64, value);
+}
+
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[2], value);
+}
+
+Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[1], value);
+}
+
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.F16[2], value);
+}
+
+Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
+ return ctx.OpPackHalf2x16(ctx.U32[1], value);
+}
+
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
+ return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
+}
+
+Id EmitPackDouble2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.F64[1], value);
+}
+
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) {
+ return ctx.OpBitcast(ctx.U32[2], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
new file mode 100644
index 000000000..10ff4ecab
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -0,0 +1,155 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
+ return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
+}
+
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+ return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+ return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
+}
+
+Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
+}
+
+Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
+}
+
+Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
+}
+
+Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
+ return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
+}
+
+Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+ return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+ return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
+}
+
+Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
+}
+
+Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
+ return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
+}
+
+Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
+ return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
+}
+
+Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
+ return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
+}
+
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
+ return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
+}
+
+Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
+}
+
+void EmitCompositeConstructF64x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeConstructF64x3(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeConstructF64x4(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x2(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x3(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitCompositeExtractF64x4(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
+}
+
+Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
+}
+
+Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
+ return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
new file mode 100644
index 000000000..fb8c02a77
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -0,0 +1,505 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+#include <utility>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+struct AttrInfo {
+ Id pointer;
+ Id id;
+ bool needs_cast;
+};
+
+std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
+ const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+ switch (type) {
+ case AttributeType::Float:
+ return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+ case AttributeType::UnsignedInt:
+ return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+ case AttributeType::SignedInt:
+ return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+ case AttributeType::Disabled:
+ return std::nullopt;
+ }
+ throw InvalidArgument("Invalid attribute type {}", type);
+}
+
+template <typename... Args>
+Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
+ switch (ctx.stage) {
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ case Stage::Geometry:
+ return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...);
+ default:
+ return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...);
+ }
+}
+
+template <typename... Args>
+Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
+ if (ctx.stage == Stage::TessellationControl) {
+ const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)};
+ return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...);
+ } else {
+ return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...);
+ }
+}
+
+struct OutAttr {
+ OutAttr(Id pointer_) : pointer{pointer_} {}
+ OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {}
+
+ Id pointer{};
+ Id type{};
+};
+
+std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ const u32 element{IR::GenericAttributeElement(attr)};
+ const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
+ if (info.num_components == 1) {
+ return info.id;
+ } else {
+ const u32 index_element{element - info.first_element};
+ const Id index_id{ctx.Const(index_element)};
+ return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
+ }
+ }
+ switch (attr) {
+ case IR::Attribute::PointSize:
+ return ctx.output_point_size;
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW: {
+ const u32 element{static_cast<u32>(attr) % 4};
+ const Id element_id{ctx.Const(element)};
+ return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
+ }
+ case IR::Attribute::ClipDistance0:
+ case IR::Attribute::ClipDistance1:
+ case IR::Attribute::ClipDistance2:
+ case IR::Attribute::ClipDistance3:
+ case IR::Attribute::ClipDistance4:
+ case IR::Attribute::ClipDistance5:
+ case IR::Attribute::ClipDistance6:
+ case IR::Attribute::ClipDistance7: {
+ const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
+ const u32 index{static_cast<u32>(attr) - base};
+ const Id clip_num{ctx.Const(index)};
+ return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num);
+ }
+ case IR::Attribute::Layer:
+ if (ctx.profile.support_viewport_index_layer_non_geometry ||
+ ctx.stage == Shader::Stage::Geometry) {
+ return OutAttr{ctx.layer, ctx.U32[1]};
+ }
+ return std::nullopt;
+ case IR::Attribute::ViewportIndex:
+ if (ctx.profile.support_viewport_index_layer_non_geometry ||
+ ctx.stage == Shader::Stage::Geometry) {
+ return OutAttr{ctx.viewport_index, ctx.U32[1]};
+ }
+ return std::nullopt;
+ case IR::Attribute::ViewportMask:
+ if (!ctx.profile.support_viewport_mask) {
+ return std::nullopt;
+ }
+ return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value),
+ ctx.U32[1]};
+ default:
+ throw NotImplementedException("Read attribute {}", attr);
+ }
+}
+
+Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
+ const IR::Value& binding, const IR::Value& offset) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Constant buffer indexing");
+ }
+ const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
+ const Id uniform_type{ctx.uniform_types.*member_ptr};
+ if (!offset.IsImmediate()) {
+ Id index{ctx.Def(offset)};
+ if (element_size > 1) {
+ const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
+ const Id shift{ctx.Const(log2_element_size)};
+ index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
+ }
+ const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
+ return ctx.OpLoad(result_type, access_chain);
+ }
+ // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
+ const Id imm_offset{ctx.Const(offset.U32() / element_size)};
+ const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
+ return ctx.OpLoad(result_type, access_chain);
+}
+
+Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
+}
+
+Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
+}
+
+Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
+ if (offset.IsImmediate()) {
+ const u32 element{(offset.U32() / 4) % 4 + index_offset};
+ return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
+ }
+ const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
+ Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
+ if (index_offset > 0) {
+ element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
+ }
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element);
+}
+} // Anonymous namespace
+
+void EmitGetRegister(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetRegister(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetPred(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetPred(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetGotoVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetGotoVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitSetIndirectBranchVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitGetIndirectBranchVariable(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
+ const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
+ return ctx.OpUConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset8(offset)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
+}
+
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
+ const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
+ return ctx.OpSConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset8(offset)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u));
+}
+
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
+ const Id load{
+ GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
+ return ctx.OpUConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset16(offset)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
+}
+
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
+ const Id load{
+ GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
+ return ctx.OpSConvert(ctx.U32[1], load);
+ }
+ Id element{};
+ if (ctx.profile.support_descriptor_aliasing) {
+ element = GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ element = GetCbufElement(ctx, vector, offset, 0u);
+ }
+ const Id bit_offset{ctx.BitOffset16(offset)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u));
+}
+
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return GetCbufU32(ctx, binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ return GetCbufElement(ctx, vector, offset, 0u);
+ }
+}
+
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
+ }
+}
+
+Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding,
+ offset);
+ } else {
+ const Id vector{GetCbufU32x4(ctx, binding, offset)};
+ return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),
+ GetCbufElement(ctx, vector, offset, 1u));
+ }
+}
+
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
+ const u32 element{static_cast<u32>(attr) % 4};
+ if (IR::IsGeneric(attr)) {
+ const u32 index{IR::GenericAttributeIndex(attr)};
+ const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
+ if (!type) {
+ // Attribute is disabled
+ return ctx.Const(element == 3 ? 1.0f : 0.0f);
+ }
+ if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+ // Varying component is not written
+ return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
+ }
+ const Id generic_id{ctx.input_generics.at(index)};
+ const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
+ const Id value{ctx.OpLoad(type->id, pointer)};
+ return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
+ }
+ switch (attr) {
+ case IR::Attribute::PrimitiveId:
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
+ case IR::Attribute::PositionX:
+ case IR::Attribute::PositionY:
+ case IR::Attribute::PositionZ:
+ case IR::Attribute::PositionW:
+ return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
+ ctx.Const(element)));
+ case IR::Attribute::InstanceId:
+ if (ctx.profile.support_vertex_instance_id) {
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
+ } else {
+ const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)};
+ const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)};
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+ }
+ case IR::Attribute::VertexId:
+ if (ctx.profile.support_vertex_instance_id) {
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id));
+ } else {
+ const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
+ const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+ }
+ case IR::Attribute::FrontFace:
+ return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
+ ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
+ case IR::Attribute::PointSpriteS:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
+ case IR::Attribute::PointSpriteT:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U)));
+ case IR::Attribute::TessellationEvaluationPointU:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
+ case IR::Attribute::TessellationEvaluationPointV:
+ return ctx.OpLoad(ctx.F32[1],
+ ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U)));
+
+ default:
+ throw NotImplementedException("Read attribute {}", attr);
+ }
+}
+
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) {
+ const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)};
+ if (!output) {
+ return;
+ }
+ if (Sirit::ValidId(output->type)) {
+ value = ctx.OpBitcast(output->type, value);
+ }
+ ctx.OpStore(output->pointer, value);
+}
+
+Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) {
+ switch (ctx.stage) {
+ case Stage::TessellationControl:
+ case Stage::TessellationEval:
+ case Stage::Geometry:
+ return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex);
+ default:
+ return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
+ }
+}
+
+void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
+}
+
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Non-generic patch load");
+ }
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const Id element{ctx.Const(IR::GenericPatchElement(patch))};
+ const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32};
+ const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
+ return ctx.OpLoad(ctx.F32[1], pointer);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
+ const Id pointer{[&] {
+ if (IR::IsGeneric(patch)) {
+ const u32 index{IR::GenericPatchIndex(patch)};
+ const Id element{ctx.Const(IR::GenericPatchElement(patch))};
+ return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodBottom: {
+ const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+ const Id index_id{ctx.Const(index)};
+ return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
+ }
+ case IR::Patch::TessellationLodInteriorU:
+ return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
+ ctx.u32_zero_value);
+ case IR::Patch::TessellationLodInteriorV:
+ return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u));
+ default:
+ throw NotImplementedException("Patch {}", patch);
+ }
+ }()};
+ ctx.OpStore(pointer, value);
+}
+
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
+ const Id component_id{ctx.Const(component)};
+ const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
+ ctx.OpStore(pointer, value);
+}
+
+void EmitSetSampleMask(EmitContext& ctx, Id value) {
+ ctx.OpStore(ctx.sample_mask, value);
+}
+
+void EmitSetFragDepth(EmitContext& ctx, Id value) {
+ ctx.OpStore(ctx.frag_depth, value);
+}
+
+void EmitGetZFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetSFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetCFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitGetOFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetZFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetSFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetCFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitSetOFlag(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitWorkgroupId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id);
+}
+
+Id EmitLocalInvocationId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
+}
+
+Id EmitInvocationId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
+}
+
+Id EmitSampleId(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U32[1], ctx.sample_id);
+}
+
+Id EmitIsHelperInvocation(EmitContext& ctx) {
+ return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation);
+}
+
+Id EmitYDirection(EmitContext& ctx) {
+ return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f);
+}
+
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
+ const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
+ const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+ ctx.OpStore(pointer, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
new file mode 100644
index 000000000..d33486f28
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+void EmitJoin(EmitContext&) {
+ throw NotImplementedException("Join shouldn't be emitted");
+}
+
+void EmitDemoteToHelperInvocation(EmitContext& ctx) {
+ if (ctx.profile.support_demote_to_helper_invocation) {
+ ctx.OpDemoteToHelperInvocationEXT();
+ } else {
+ const Id kill_label{ctx.OpLabel()};
+ const Id impossible_label{ctx.OpLabel()};
+ ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label);
+ ctx.AddLabel(kill_label);
+ ctx.OpKill();
+ ctx.AddLabel(impossible_label);
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
new file mode 100644
index 000000000..fd42b7a16
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -0,0 +1,269 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id ExtractU16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U16, value);
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
+ }
+}
+
+Id ExtractS16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.S16, value);
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u));
+ }
+}
+
+Id ExtractU8(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int8) {
+ return ctx.OpUConvert(ctx.U8, value);
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
+ }
+}
+
+Id ExtractS8(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int8) {
+ return ctx.OpSConvert(ctx.S8, value);
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u));
+ }
+}
+} // Anonymous namespace
+
+Id EmitConvertS16F16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+ } else {
+ return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertS16F32(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+ } else {
+ return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertS16F64(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value));
+ } else {
+ return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertS32F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS32F32(EmitContext& ctx, Id value) {
+ if (ctx.profile.has_broken_signed_operations) {
+ return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value));
+ } else {
+ return ctx.OpConvertFToS(ctx.U32[1], value);
+ }
+}
+
+Id EmitConvertS32F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U32[1], value);
+}
+
+Id EmitConvertS64F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertS64F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToS(ctx.U64, value);
+}
+
+Id EmitConvertU16F16(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+ } else {
+ return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertU16F32(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+ } else {
+ return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertU16F64(EmitContext& ctx, Id value) {
+ if (ctx.profile.support_int16) {
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value));
+ } else {
+ return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value));
+ }
+}
+
+Id EmitConvertU32F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU32F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U32[1], value);
+}
+
+Id EmitConvertU64F16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64F64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertFToU(ctx.U64, value);
+}
+
+Id EmitConvertU64U32(EmitContext& ctx, Id value) {
+ return ctx.OpUConvert(ctx.U64, value);
+}
+
+Id EmitConvertU32U64(EmitContext& ctx, Id value) {
+ return ctx.OpUConvert(ctx.U32[1], value);
+}
+
+Id EmitConvertF16F32(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F16[1], value);
+}
+
+Id EmitConvertF32F16(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F32[1], value);
+}
+
+Id EmitConvertF32F64(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F32[1], value);
+}
+
+Id EmitConvertF64F32(EmitContext& ctx, Id value) {
+ return ctx.OpFConvert(ctx.F64[1], value);
+}
+
+Id EmitConvertF16S8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF16S16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF16S32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16S64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF16U16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF16U32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF16U64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F16[1], value);
+}
+
+Id EmitConvertF32S8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF32S16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF32S32(EmitContext& ctx, Id value) {
+ if (ctx.profile.has_broken_signed_operations) {
+ value = ctx.OpBitcast(ctx.S32[1], value);
+ }
+ return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32S64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF32U16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF32U32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF32U64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F32[1], value);
+}
+
+Id EmitConvertF64S8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value));
+}
+
+Id EmitConvertF64S16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value));
+}
+
+Id EmitConvertF64S32(EmitContext& ctx, Id value) {
+ if (ctx.profile.has_broken_signed_operations) {
+ value = ctx.OpBitcast(ctx.S32[1], value);
+ }
+ return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64S64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertSToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U8(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value));
+}
+
+Id EmitConvertF64U16(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value));
+}
+
+Id EmitConvertF64U32(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+Id EmitConvertF64U64(EmitContext& ctx, Id value) {
+ return ctx.OpConvertUToF(ctx.F64[1], value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
new file mode 100644
index 000000000..61cf25f9c
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -0,0 +1,396 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
+ const auto flags{inst->Flags<IR::FpControl>()};
+ if (flags.no_contraction) {
+ ctx.Decorate(op, spv::Decoration::NoContraction);
+ }
+ return op;
+}
+
+Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
+ if (ctx.profile.has_broken_spirv_clamp) {
+ return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
+ } else {
+ return ctx.OpFClamp(type, value, zero, one);
+ }
+}
+
+Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ if (ctx.profile.ignore_nan_fp_comparisons) {
+ const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)};
+ const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))};
+ const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))};
+ return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan);
+ } else {
+ return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
+ }
+}
+
+Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) {
+ if (ctx.profile.ignore_nan_fp_comparisons) {
+ const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)};
+ const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)};
+ const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)};
+ return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan);
+ } else {
+ return (ctx.*comp_func)(ctx.U1, lhs, rhs);
+ }
+}
+} // Anonymous namespace
+
+Id EmitFPAbs16(EmitContext& ctx, Id value) {
+ return ctx.OpFAbs(ctx.F16[1], value);
+}
+
+Id EmitFPAbs32(EmitContext& ctx, Id value) {
+ return ctx.OpFAbs(ctx.F32[1], value);
+}
+
+Id EmitFPAbs64(EmitContext& ctx, Id value) {
+ return ctx.OpFAbs(ctx.F64[1], value);
+}
+
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b));
+}
+
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b));
+}
+
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b));
+}
+
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+ return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
+}
+
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+ return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
+}
+
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
+ return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
+}
+
+Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMax(ctx.F32[1], a, b);
+}
+
+Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMax(ctx.F64[1], a, b);
+}
+
+Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMin(ctx.F32[1], a, b);
+}
+
+Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpFMin(ctx.F64[1], a, b);
+}
+
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
+}
+
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b));
+}
+
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
+}
+
+Id EmitFPNeg16(EmitContext& ctx, Id value) {
+ return ctx.OpFNegate(ctx.F16[1], value);
+}
+
+Id EmitFPNeg32(EmitContext& ctx, Id value) {
+ return ctx.OpFNegate(ctx.F32[1], value);
+}
+
+Id EmitFPNeg64(EmitContext& ctx, Id value) {
+ return ctx.OpFNegate(ctx.F64[1], value);
+}
+
+Id EmitFPSin(EmitContext& ctx, Id value) {
+ return ctx.OpSin(ctx.F32[1], value);
+}
+
+Id EmitFPCos(EmitContext& ctx, Id value) {
+ return ctx.OpCos(ctx.F32[1], value);
+}
+
+Id EmitFPExp2(EmitContext& ctx, Id value) {
+ return ctx.OpExp2(ctx.F32[1], value);
+}
+
+Id EmitFPLog2(EmitContext& ctx, Id value) {
+ return ctx.OpLog2(ctx.F32[1], value);
+}
+
+Id EmitFPRecip32(EmitContext& ctx, Id value) {
+ return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value);
+}
+
+Id EmitFPRecip64(EmitContext& ctx, Id value) {
+ return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
+}
+
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
+ return ctx.OpInverseSqrt(ctx.F32[1], value);
+}
+
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
+ return ctx.OpInverseSqrt(ctx.F64[1], value);
+}
+
+Id EmitFPSqrt(EmitContext& ctx, Id value) {
+ return ctx.OpSqrt(ctx.F32[1], value);
+}
+
+Id EmitFPSaturate16(EmitContext& ctx, Id value) {
+ const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
+ const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
+ return Clamp(ctx, ctx.F16[1], value, zero, one);
+}
+
+Id EmitFPSaturate32(EmitContext& ctx, Id value) {
+ const Id zero{ctx.Const(f32{0.0})};
+ const Id one{ctx.Const(f32{1.0})};
+ return Clamp(ctx, ctx.F32[1], value, zero, one);
+}
+
+Id EmitFPSaturate64(EmitContext& ctx, Id value) {
+ const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
+ const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
+ return Clamp(ctx, ctx.F64[1], value, zero, one);
+}
+
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+ return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+ return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
+}
+
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
+ return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
+}
+
+Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
+ return ctx.OpRoundEven(ctx.F16[1], value);
+}
+
+Id EmitFPRoundEven32(EmitContext& ctx, Id value) {
+ return ctx.OpRoundEven(ctx.F32[1], value);
+}
+
+Id EmitFPRoundEven64(EmitContext& ctx, Id value) {
+ return ctx.OpRoundEven(ctx.F64[1], value);
+}
+
+Id EmitFPFloor16(EmitContext& ctx, Id value) {
+ return ctx.OpFloor(ctx.F16[1], value);
+}
+
+Id EmitFPFloor32(EmitContext& ctx, Id value) {
+ return ctx.OpFloor(ctx.F32[1], value);
+}
+
+Id EmitFPFloor64(EmitContext& ctx, Id value) {
+ return ctx.OpFloor(ctx.F64[1], value);
+}
+
+Id EmitFPCeil16(EmitContext& ctx, Id value) {
+ return ctx.OpCeil(ctx.F16[1], value);
+}
+
+Id EmitFPCeil32(EmitContext& ctx, Id value) {
+ return ctx.OpCeil(ctx.F32[1], value);
+}
+
+Id EmitFPCeil64(EmitContext& ctx, Id value) {
+ return ctx.OpCeil(ctx.F64[1], value);
+}
+
+Id EmitFPTrunc16(EmitContext& ctx, Id value) {
+ return ctx.OpTrunc(ctx.F16[1], value);
+}
+
+Id EmitFPTrunc32(EmitContext& ctx, Id value) {
+ return ctx.OpTrunc(ctx.F32[1], value);
+}
+
+Id EmitFPTrunc64(EmitContext& ctx, Id value) {
+ return ctx.OpTrunc(ctx.F64[1], value);
+}
+
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPOrdNotEqual(ctx, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
+ return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs);
+}
+
+Id EmitFPIsNan16(EmitContext& ctx, Id value) {
+ return ctx.OpIsNan(ctx.U1, value);
+}
+
+Id EmitFPIsNan32(EmitContext& ctx, Id value) {
+ return ctx.OpIsNan(ctx.U1, value);
+}
+
+Id EmitFPIsNan64(EmitContext& ctx, Id value) {
+ return ctx.OpIsNan(ctx.U1, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
new file mode 100644
index 000000000..3588f052b
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -0,0 +1,462 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <boost/container/static_vector.hpp>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+class ImageOperands {
+public:
+ explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
+ Id lod, const IR::Value& offset) {
+ if (has_bias) {
+ const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+ Add(spv::ImageOperandsMask::Bias, bias);
+ }
+ if (has_lod) {
+ const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
+ Add(spv::ImageOperandsMask::Lod, lod_value);
+ }
+ AddOffset(ctx, offset);
+ if (has_lod_clamp) {
+ const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
+ Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+ }
+ }
+
+ explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
+ if (offset2.IsEmpty()) {
+ if (offset.IsEmpty()) {
+ return;
+ }
+ Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
+ return;
+ }
+ const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
+ if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring");
+ return;
+ }
+ const IR::Opcode opcode{values[0]->GetOpcode()};
+ if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
+ throw LogicError("Invalid PTP arguments");
+ }
+ auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }};
+
+ const Id offsets{ctx.ConstantComposite(
+ ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)),
+ ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)),
+ ctx.Const(read(1, 2), read(1, 3)))};
+ Add(spv::ImageOperandsMask::ConstOffsets, offsets);
+ }
+
+ explicit ImageOperands(Id offset, Id lod, Id ms) {
+ if (Sirit::ValidId(lod)) {
+ Add(spv::ImageOperandsMask::Lod, lod);
+ }
+ if (Sirit::ValidId(offset)) {
+ Add(spv::ImageOperandsMask::Offset, offset);
+ }
+ if (Sirit::ValidId(ms)) {
+ Add(spv::ImageOperandsMask::Sample, ms);
+ }
+ }
+
+ explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
+ Id offset, Id lod_clamp) {
+ if (!Sirit::ValidId(derivates)) {
+ throw LogicError("Derivates must be present");
+ }
+ boost::container::static_vector<Id, 3> deriv_x_accum;
+ boost::container::static_vector<Id, 3> deriv_y_accum;
+ for (u32 i = 0; i < num_derivates; ++i) {
+ deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
+ deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
+ }
+ const Id derivates_X{ctx.OpCompositeConstruct(
+ ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
+ const Id derivates_Y{ctx.OpCompositeConstruct(
+ ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
+ Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
+ if (Sirit::ValidId(offset)) {
+ Add(spv::ImageOperandsMask::Offset, offset);
+ }
+ if (has_lod_clamp) {
+ Add(spv::ImageOperandsMask::MinLod, lod_clamp);
+ }
+ }
+
+ std::span<const Id> Span() const noexcept {
+ return std::span{operands.data(), operands.size()};
+ }
+
+ std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept {
+ return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt;
+ }
+
+ spv::ImageOperandsMask Mask() const noexcept {
+ return mask;
+ }
+
+private:
+ void AddOffset(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsEmpty()) {
+ return;
+ }
+ if (offset.IsImmediate()) {
+ Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32())));
+ return;
+ }
+ IR::Inst* const inst{offset.InstRecursive()};
+ if (inst->AreAllArgsImmediates()) {
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::CompositeConstructU32x2:
+ Add(spv::ImageOperandsMask::ConstOffset,
+ ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+ static_cast<s32>(inst->Arg(1).U32())));
+ return;
+ case IR::Opcode::CompositeConstructU32x3:
+ Add(spv::ImageOperandsMask::ConstOffset,
+ ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+ static_cast<s32>(inst->Arg(1).U32()),
+ static_cast<s32>(inst->Arg(2).U32())));
+ return;
+ case IR::Opcode::CompositeConstructU32x4:
+ Add(spv::ImageOperandsMask::ConstOffset,
+ ctx.SConst(static_cast<s32>(inst->Arg(0).U32()),
+ static_cast<s32>(inst->Arg(1).U32()),
+ static_cast<s32>(inst->Arg(2).U32()),
+ static_cast<s32>(inst->Arg(3).U32())));
+ return;
+ default:
+ break;
+ }
+ }
+ Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
+ }
+
+ void Add(spv::ImageOperandsMask new_mask, Id value) {
+ mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+ static_cast<unsigned>(new_mask));
+ operands.push_back(value);
+ }
+
+ void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) {
+ mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
+ static_cast<unsigned>(new_mask));
+ operands.push_back(value_1);
+ operands.push_back(value_2);
+ }
+
+ boost::container::static_vector<Id, 4> operands;
+ spv::ImageOperandsMask mask{};
+};
+
+Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) {
+ const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
+ if (def.count > 1) {
+ const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
+ return ctx.OpLoad(def.sampled_type, pointer);
+ } else {
+ return ctx.OpLoad(def.sampled_type, def.id);
+ }
+}
+
+Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) {
+ if (!index.IsImmediate() || index.U32() != 0) {
+ throw NotImplementedException("Indirect image indexing");
+ }
+ if (info.type == TextureType::Buffer) {
+ const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)};
+ if (def.count > 1) {
+ throw NotImplementedException("Indirect texture sample");
+ }
+ const Id sampler_id{def.id};
+ const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
+ return ctx.OpImage(ctx.image_buffer_type, id);
+ } else {
+ const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
+ if (def.count > 1) {
+ throw NotImplementedException("Indirect texture sample");
+ }
+ return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
+ }
+}
+
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+ if (!index.IsImmediate() || index.U32() != 0) {
+ throw NotImplementedException("Indirect image indexing");
+ }
+ if (info.type == TextureType::Buffer) {
+ const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)};
+ return ctx.OpLoad(def.image_type, def.id);
+ } else {
+ const ImageDefinition def{ctx.images.at(info.descriptor_index)};
+ return ctx.OpLoad(def.image_type, def.id);
+ }
+}
+
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (info.relaxed_precision != 0) {
+ ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
+ }
+ return sample;
+}
+
+template <typename MethodPtrType, typename... Args>
+Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
+ Id result_type, Args&&... args) {
+ IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (!sparse) {
+ return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
+ }
+ const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
+ const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
+ const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
+ sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
+ sparse->Invalidate();
+ Decorate(ctx, inst, sample);
+ return ctx.OpCompositeExtract(result_type, sample, 1U);
+}
+} // Anonymous namespace
+
+Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBindlessImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitBoundImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id bias_lc, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (ctx.stage == Stage::Fragment) {
+ const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
+ bias_lc, offset);
+ return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
+ &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+ } else {
+ // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as
+ // if the lod was explicitly zero. This may change on Turing with implicit compute
+ // derivatives
+ const Id lod{ctx.Const(0.0f)};
+ const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
+ return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+ }
+}
+
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id lod, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, false, true, false, lod, offset);
+ return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
+ offset);
+ return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
+ &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
+ Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id lod, const IR::Value& offset) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, false, true, false, lod, offset);
+ return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
+ &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
+ Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span());
+}
+
+Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, offset, offset2);
+ return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
+ ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
+ operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2, Id dref) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, offset, offset2);
+ return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
+ ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
+ operands.Span());
+}
+
+Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
+ Id lod, Id ms) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (info.type == TextureType::Buffer) {
+ lod = Id{};
+ }
+ const ImageOperands operands(offset, lod, ms);
+ return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
+ TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
+}
+
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const Id image{TextureImage(ctx, info, index)};
+ const Id zero{ctx.u32_zero_value};
+ const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
+ switch (info.type) {
+ case TextureType::Color1D:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
+ zero, zero, mips());
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ case TextureType::ColorCube:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod),
+ zero, mips());
+ case TextureType::ColorArray2D:
+ case TextureType::Color3D:
+ case TextureType::ColorArrayCube:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
+ mips());
+ case TextureType::Buffer:
+ return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
+ zero, mips());
+ }
+ throw LogicError("Unspecified image type {}", info.type.Value());
+}
+
+Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const Id zero{ctx.f32_zero_value};
+ const Id sampler{Texture(ctx, info, index)};
+ return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
+ zero, zero);
+}
+
+Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id derivates, Id offset, Id lod_clamp) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates,
+ offset, lod_clamp);
+ return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
+ &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
+ Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
+}
+
+Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
+ LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
+ return ctx.ConstantNull(ctx.U32[4]);
+ }
+ return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4],
+ Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{});
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ ctx.OpImageWrite(Image(ctx, index, info), coords, color);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
new file mode 100644
index 000000000..d7f1a365a
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
+ if (!index.IsImmediate()) {
+ throw NotImplementedException("Indirect image indexing");
+ }
+ if (info.type == TextureType::Buffer) {
+ const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())};
+ return def.id;
+ } else {
+ const ImageDefinition def{ctx.images.at(index.U32())};
+ return def.id;
+ }
+}
+
+std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
+ const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
+ const Id semantics{ctx.u32_zero_value};
+ return {scope, semantics};
+}
+
+Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const auto info{inst->Flags<IR::TextureInstInfo>()};
+ const Id image{Image(ctx, index, info)};
+ const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+}
+} // Anonymous namespace
+
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd);
+}
+
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin);
+}
+
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin);
+}
+
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax);
+}
+
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax);
+}
+
+Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+ // TODO: This is not yet implemented
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) {
+ // TODO: This is not yet implemented
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd);
+}
+
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr);
+}
+
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor);
+}
+
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value) {
+ return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange);
+}
+
+Id EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicInc32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicDec32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicOr32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicXor32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBindlessImageAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicIAdd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMin32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicSMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicUMax32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicInc32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicDec32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicAnd32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicOr32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicXor32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitBoundImageAtomicExchange32(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
new file mode 100644
index 000000000..c9db1c164
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -0,0 +1,581 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+enum class Attribute : u64;
+enum class Patch : u64;
+class Inst;
+class Value;
+} // namespace Shader::IR
+
+namespace Shader::Backend::SPIRV {
+
+using Sirit::Id;
+
+class EmitContext;
+
+// Microinstruction emitters
+Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
+void EmitVoid(EmitContext& ctx);
+Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
+Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
+void EmitReference(EmitContext&);
+void EmitPhiMove(EmitContext&);
+void EmitJoin(EmitContext& ctx);
+void EmitDemoteToHelperInvocation(EmitContext& ctx);
+void EmitBarrier(EmitContext& ctx);
+void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
+void EmitDeviceMemoryBarrier(EmitContext& ctx);
+void EmitPrologue(EmitContext& ctx);
+void EmitEpilogue(EmitContext& ctx);
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream);
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream);
+void EmitGetRegister(EmitContext& ctx);
+void EmitSetRegister(EmitContext& ctx);
+void EmitGetPred(EmitContext& ctx);
+void EmitSetPred(EmitContext& ctx);
+void EmitSetGotoVariable(EmitContext& ctx);
+void EmitGetGotoVariable(EmitContext& ctx);
+void EmitSetIndirectBranchVariable(EmitContext& ctx);
+void EmitGetIndirectBranchVariable(EmitContext& ctx);
+Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
+void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex);
+Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex);
+void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex);
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
+void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
+void EmitSetSampleMask(EmitContext& ctx, Id value);
+void EmitSetFragDepth(EmitContext& ctx, Id value);
+void EmitGetZFlag(EmitContext& ctx);
+void EmitGetSFlag(EmitContext& ctx);
+void EmitGetCFlag(EmitContext& ctx);
+void EmitGetOFlag(EmitContext& ctx);
+void EmitSetZFlag(EmitContext& ctx);
+void EmitSetSFlag(EmitContext& ctx);
+void EmitSetCFlag(EmitContext& ctx);
+void EmitSetOFlag(EmitContext& ctx);
+Id EmitWorkgroupId(EmitContext& ctx);
+Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitInvocationId(EmitContext& ctx);
+Id EmitSampleId(EmitContext& ctx);
+Id EmitIsHelperInvocation(EmitContext& ctx);
+Id EmitYDirection(EmitContext& ctx);
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
+Id EmitUndefU1(EmitContext& ctx);
+Id EmitUndefU8(EmitContext& ctx);
+Id EmitUndefU16(EmitContext& ctx);
+Id EmitUndefU32(EmitContext& ctx);
+Id EmitUndefU64(EmitContext& ctx);
+void EmitLoadGlobalU8(EmitContext& ctx);
+void EmitLoadGlobalS8(EmitContext& ctx);
+void EmitLoadGlobalU16(EmitContext& ctx);
+void EmitLoadGlobalS16(EmitContext& ctx);
+Id EmitLoadGlobal32(EmitContext& ctx, Id address);
+Id EmitLoadGlobal64(EmitContext& ctx, Id address);
+Id EmitLoadGlobal128(EmitContext& ctx, Id address);
+void EmitWriteGlobalU8(EmitContext& ctx);
+void EmitWriteGlobalS8(EmitContext& ctx);
+void EmitWriteGlobalU16(EmitContext& ctx);
+void EmitWriteGlobalS16(EmitContext& ctx);
+void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
+void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
+void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
+Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
+Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
+Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
+Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
+Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
+void EmitCompositeConstructF64x2(EmitContext& ctx);
+void EmitCompositeConstructF64x3(EmitContext& ctx);
+void EmitCompositeConstructF64x4(EmitContext& ctx);
+void EmitCompositeExtractF64x2(EmitContext& ctx);
+void EmitCompositeExtractF64x3(EmitContext& ctx);
+void EmitCompositeExtractF64x4(EmitContext& ctx);
+Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
+Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
+void EmitBitCastU16F16(EmitContext& ctx);
+Id EmitBitCastU32F32(EmitContext& ctx, Id value);
+void EmitBitCastU64F64(EmitContext& ctx);
+void EmitBitCastF16U16(EmitContext& ctx);
+Id EmitBitCastF32U32(EmitContext& ctx, Id value);
+void EmitBitCastF64U64(EmitContext& ctx);
+Id EmitPackUint2x32(EmitContext& ctx, Id value);
+Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
+Id EmitPackFloat2x16(EmitContext& ctx, Id value);
+Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
+Id EmitPackHalf2x16(EmitContext& ctx, Id value);
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
+Id EmitPackDouble2x32(EmitContext& ctx, Id value);
+Id EmitUnpackDouble2x32(EmitContext& ctx, Id value);
+void EmitGetZeroFromOp(EmitContext& ctx);
+void EmitGetSignFromOp(EmitContext& ctx);
+void EmitGetCarryFromOp(EmitContext& ctx);
+void EmitGetOverflowFromOp(EmitContext& ctx);
+void EmitGetSparseFromOp(EmitContext& ctx);
+void EmitGetInBoundsFromOp(EmitContext& ctx);
+Id EmitFPAbs16(EmitContext& ctx, Id value);
+Id EmitFPAbs32(EmitContext& ctx, Id value);
+Id EmitFPAbs64(EmitContext& ctx, Id value);
+Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
+Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
+Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
+Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
+Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
+Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPNeg16(EmitContext& ctx, Id value);
+Id EmitFPNeg32(EmitContext& ctx, Id value);
+Id EmitFPNeg64(EmitContext& ctx, Id value);
+Id EmitFPSin(EmitContext& ctx, Id value);
+Id EmitFPCos(EmitContext& ctx, Id value);
+Id EmitFPExp2(EmitContext& ctx, Id value);
+Id EmitFPLog2(EmitContext& ctx, Id value);
+Id EmitFPRecip32(EmitContext& ctx, Id value);
+Id EmitFPRecip64(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
+Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
+Id EmitFPSqrt(EmitContext& ctx, Id value);
+Id EmitFPSaturate16(EmitContext& ctx, Id value);
+Id EmitFPSaturate32(EmitContext& ctx, Id value);
+Id EmitFPSaturate64(EmitContext& ctx, Id value);
+Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
+Id EmitFPRoundEven16(EmitContext& ctx, Id value);
+Id EmitFPRoundEven32(EmitContext& ctx, Id value);
+Id EmitFPRoundEven64(EmitContext& ctx, Id value);
+Id EmitFPFloor16(EmitContext& ctx, Id value);
+Id EmitFPFloor32(EmitContext& ctx, Id value);
+Id EmitFPFloor64(EmitContext& ctx, Id value);
+Id EmitFPCeil16(EmitContext& ctx, Id value);
+Id EmitFPCeil32(EmitContext& ctx, Id value);
+Id EmitFPCeil64(EmitContext& ctx, Id value);
+Id EmitFPTrunc16(EmitContext& ctx, Id value);
+Id EmitFPTrunc32(EmitContext& ctx, Id value);
+Id EmitFPTrunc64(EmitContext& ctx, Id value);
+Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitFPIsNan16(EmitContext& ctx, Id value);
+Id EmitFPIsNan32(EmitContext& ctx, Id value);
+Id EmitFPIsNan64(EmitContext& ctx, Id value);
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
+Id EmitISub32(EmitContext& ctx, Id a, Id b);
+Id EmitISub64(EmitContext& ctx, Id a, Id b);
+Id EmitIMul32(EmitContext& ctx, Id a, Id b);
+Id EmitINeg32(EmitContext& ctx, Id value);
+Id EmitINeg64(EmitContext& ctx, Id value);
+Id EmitIAbs32(EmitContext& ctx, Id value);
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
+Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
+Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count);
+Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
+Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
+Id EmitBitReverse32(EmitContext& ctx, Id value);
+Id EmitBitCount32(EmitContext& ctx, Id value);
+Id EmitBitwiseNot32(EmitContext& ctx, Id value);
+Id EmitFindSMsb32(EmitContext& ctx, Id value);
+Id EmitFindUMsb32(EmitContext& ctx, Id value);
+Id EmitSMin32(EmitContext& ctx, Id a, Id b);
+Id EmitUMin32(EmitContext& ctx, Id a, Id b);
+Id EmitSMax32(EmitContext& ctx, Id a, Id b);
+Id EmitUMax32(EmitContext& ctx, Id a, Id b);
+Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
+Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
+Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
+Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value);
+Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
+Id EmitGlobalAtomicSMin32(EmitContext& ctx);
+Id EmitGlobalAtomicUMin32(EmitContext& ctx);
+Id EmitGlobalAtomicSMax32(EmitContext& ctx);
+Id EmitGlobalAtomicUMax32(EmitContext& ctx);
+Id EmitGlobalAtomicInc32(EmitContext& ctx);
+Id EmitGlobalAtomicDec32(EmitContext& ctx);
+Id EmitGlobalAtomicAnd32(EmitContext& ctx);
+Id EmitGlobalAtomicOr32(EmitContext& ctx);
+Id EmitGlobalAtomicXor32(EmitContext& ctx);
+Id EmitGlobalAtomicExchange32(EmitContext& ctx);
+Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
+Id EmitGlobalAtomicSMin64(EmitContext& ctx);
+Id EmitGlobalAtomicUMin64(EmitContext& ctx);
+Id EmitGlobalAtomicSMax64(EmitContext& ctx);
+Id EmitGlobalAtomicUMax64(EmitContext& ctx);
+Id EmitGlobalAtomicInc64(EmitContext& ctx);
+Id EmitGlobalAtomicDec64(EmitContext& ctx);
+Id EmitGlobalAtomicAnd64(EmitContext& ctx);
+Id EmitGlobalAtomicOr64(EmitContext& ctx);
+Id EmitGlobalAtomicXor64(EmitContext& ctx);
+Id EmitGlobalAtomicExchange64(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32(EmitContext& ctx);
+Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
+Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
+Id EmitLogicalNot(EmitContext& ctx, Id value);
+Id EmitConvertS16F16(EmitContext& ctx, Id value);
+Id EmitConvertS16F32(EmitContext& ctx, Id value);
+Id EmitConvertS16F64(EmitContext& ctx, Id value);
+Id EmitConvertS32F16(EmitContext& ctx, Id value);
+Id EmitConvertS32F32(EmitContext& ctx, Id value);
+Id EmitConvertS32F64(EmitContext& ctx, Id value);
+Id EmitConvertS64F16(EmitContext& ctx, Id value);
+Id EmitConvertS64F32(EmitContext& ctx, Id value);
+Id EmitConvertS64F64(EmitContext& ctx, Id value);
+Id EmitConvertU16F16(EmitContext& ctx, Id value);
+Id EmitConvertU16F32(EmitContext& ctx, Id value);
+Id EmitConvertU16F64(EmitContext& ctx, Id value);
+Id EmitConvertU32F16(EmitContext& ctx, Id value);
+Id EmitConvertU32F32(EmitContext& ctx, Id value);
+Id EmitConvertU32F64(EmitContext& ctx, Id value);
+Id EmitConvertU64F16(EmitContext& ctx, Id value);
+Id EmitConvertU64F32(EmitContext& ctx, Id value);
+Id EmitConvertU64F64(EmitContext& ctx, Id value);
+Id EmitConvertU64U32(EmitContext& ctx, Id value);
+Id EmitConvertU32U64(EmitContext& ctx, Id value);
+Id EmitConvertF16F32(EmitContext& ctx, Id value);
+Id EmitConvertF32F16(EmitContext& ctx, Id value);
+Id EmitConvertF32F64(EmitContext& ctx, Id value);
+Id EmitConvertF64F32(EmitContext& ctx, Id value);
+Id EmitConvertF16S8(EmitContext& ctx, Id value);
+Id EmitConvertF16S16(EmitContext& ctx, Id value);
+Id EmitConvertF16S32(EmitContext& ctx, Id value);
+Id EmitConvertF16S64(EmitContext& ctx, Id value);
+Id EmitConvertF16U8(EmitContext& ctx, Id value);
+Id EmitConvertF16U16(EmitContext& ctx, Id value);
+Id EmitConvertF16U32(EmitContext& ctx, Id value);
+Id EmitConvertF16U64(EmitContext& ctx, Id value);
+Id EmitConvertF32S8(EmitContext& ctx, Id value);
+Id EmitConvertF32S16(EmitContext& ctx, Id value);
+Id EmitConvertF32S32(EmitContext& ctx, Id value);
+Id EmitConvertF32S64(EmitContext& ctx, Id value);
+Id EmitConvertF32U8(EmitContext& ctx, Id value);
+Id EmitConvertF32U16(EmitContext& ctx, Id value);
+Id EmitConvertF32U32(EmitContext& ctx, Id value);
+Id EmitConvertF32U64(EmitContext& ctx, Id value);
+Id EmitConvertF64S8(EmitContext& ctx, Id value);
+Id EmitConvertF64S16(EmitContext& ctx, Id value);
+Id EmitConvertF64S32(EmitContext& ctx, Id value);
+Id EmitConvertF64S64(EmitContext& ctx, Id value);
+Id EmitConvertF64U8(EmitContext& ctx, Id value);
+Id EmitConvertF64U16(EmitContext& ctx, Id value);
+Id EmitConvertF64U32(EmitContext& ctx, Id value);
+Id EmitConvertF64U64(EmitContext& ctx, Id value);
+Id EmitBindlessImageSampleImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleExplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBindlessImageGather(EmitContext&);
+Id EmitBindlessImageGatherDref(EmitContext&);
+Id EmitBindlessImageFetch(EmitContext&);
+Id EmitBindlessImageQueryDimensions(EmitContext&);
+Id EmitBindlessImageQueryLod(EmitContext&);
+Id EmitBindlessImageGradient(EmitContext&);
+Id EmitBindlessImageRead(EmitContext&);
+Id EmitBindlessImageWrite(EmitContext&);
+Id EmitBoundImageSampleImplicitLod(EmitContext&);
+Id EmitBoundImageSampleExplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
+Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
+Id EmitBoundImageGather(EmitContext&);
+Id EmitBoundImageGatherDref(EmitContext&);
+Id EmitBoundImageFetch(EmitContext&);
+Id EmitBoundImageQueryDimensions(EmitContext&);
+Id EmitBoundImageQueryLod(EmitContext&);
+Id EmitBoundImageGradient(EmitContext&);
+Id EmitBoundImageRead(EmitContext&);
+Id EmitBoundImageWrite(EmitContext&);
+Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id bias_lc, const IR::Value& offset);
+Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id lod, const IR::Value& offset);
+Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id bias_lc, const IR::Value& offset);
+Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
+ Id coords, Id dref, Id lod, const IR::Value& offset);
+Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2);
+Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ const IR::Value& offset, const IR::Value& offset2, Id dref);
+Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
+ Id lod, Id ms);
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
+Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
+Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id derivates, Id offset, Id lod_clamp);
+Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
+void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
+Id EmitBindlessImageAtomicIAdd32(EmitContext&);
+Id EmitBindlessImageAtomicSMin32(EmitContext&);
+Id EmitBindlessImageAtomicUMin32(EmitContext&);
+Id EmitBindlessImageAtomicSMax32(EmitContext&);
+Id EmitBindlessImageAtomicUMax32(EmitContext&);
+Id EmitBindlessImageAtomicInc32(EmitContext&);
+Id EmitBindlessImageAtomicDec32(EmitContext&);
+Id EmitBindlessImageAtomicAnd32(EmitContext&);
+Id EmitBindlessImageAtomicOr32(EmitContext&);
+Id EmitBindlessImageAtomicXor32(EmitContext&);
+Id EmitBindlessImageAtomicExchange32(EmitContext&);
+Id EmitBoundImageAtomicIAdd32(EmitContext&);
+Id EmitBoundImageAtomicSMin32(EmitContext&);
+Id EmitBoundImageAtomicUMin32(EmitContext&);
+Id EmitBoundImageAtomicSMax32(EmitContext&);
+Id EmitBoundImageAtomicUMax32(EmitContext&);
+Id EmitBoundImageAtomicInc32(EmitContext&);
+Id EmitBoundImageAtomicDec32(EmitContext&);
+Id EmitBoundImageAtomicAnd32(EmitContext&);
+Id EmitBoundImageAtomicOr32(EmitContext&);
+Id EmitBoundImageAtomicXor32(EmitContext&);
+Id EmitBoundImageAtomicExchange32(EmitContext&);
+Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
+ Id value);
+Id EmitLaneId(EmitContext& ctx);
+Id EmitVoteAll(EmitContext& ctx, Id pred);
+Id EmitVoteAny(EmitContext& ctx, Id pred);
+Id EmitVoteEqual(EmitContext& ctx, Id pred);
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
+Id EmitSubgroupEqMask(EmitContext& ctx);
+Id EmitSubgroupLtMask(EmitContext& ctx);
+Id EmitSubgroupLeMask(EmitContext& ctx);
+Id EmitSubgroupGtMask(EmitContext& ctx);
+Id EmitSubgroupGeMask(EmitContext& ctx);
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask);
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
+Id EmitDPdxFine(EmitContext& ctx, Id op_a);
+Id EmitDPdyFine(EmitContext& ctx, Id op_a);
+Id EmitDPdxCoarse(EmitContext& ctx, Id op_a);
+Id EmitDPdyCoarse(EmitContext& ctx, Id op_a);
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
new file mode 100644
index 000000000..3501d7495
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -0,0 +1,270 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
+ IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)};
+ if (!zero) {
+ return;
+ }
+ zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value));
+ zero->Invalidate();
+}
+
+void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) {
+ IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)};
+ if (!sign) {
+ return;
+ }
+ sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value));
+ sign->Invalidate();
+}
+} // Anonymous namespace
+
+Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ Id result{};
+ if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) {
+ const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])};
+ const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)};
+ result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U);
+
+ const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)};
+ carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value));
+ carry->Invalidate();
+ } else {
+ result = ctx.OpIAdd(ctx.U32[1], a, b);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) {
+ // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c
+ constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())};
+ const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)};
+ const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)};
+
+ const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)};
+ const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)};
+ const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)};
+ overflow->SetDefinition(carry_flag);
+ overflow->Invalidate();
+ }
+ return result;
+}
+
+Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpIAdd(ctx.U64, a, b);
+}
+
+Id EmitISub32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpISub(ctx.U32[1], a, b);
+}
+
+Id EmitISub64(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpISub(ctx.U64, a, b);
+}
+
+Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpIMul(ctx.U32[1], a, b);
+}
+
+Id EmitINeg32(EmitContext& ctx, Id value) {
+ return ctx.OpSNegate(ctx.U32[1], value);
+}
+
+Id EmitINeg64(EmitContext& ctx, Id value) {
+ return ctx.OpSNegate(ctx.U64, value);
+}
+
+Id EmitIAbs32(EmitContext& ctx, Id value) {
+ return ctx.OpSAbs(ctx.U32[1], value);
+}
+
+Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftLeftLogical(ctx.U64, base, shift);
+}
+
+Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightLogical(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightLogical(ctx.U64, base, shift);
+}
+
+Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift);
+}
+
+Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) {
+ return ctx.OpShiftRightArithmetic(ctx.U64, base, shift);
+}
+
+Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+ const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) {
+ return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count);
+}
+
+Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
+ const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) {
+ const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)};
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitBitReverse32(EmitContext& ctx, Id value) {
+ return ctx.OpBitReverse(ctx.U32[1], value);
+}
+
+Id EmitBitCount32(EmitContext& ctx, Id value) {
+ return ctx.OpBitCount(ctx.U32[1], value);
+}
+
+Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
+ return ctx.OpNot(ctx.U32[1], value);
+}
+
+Id EmitFindSMsb32(EmitContext& ctx, Id value) {
+ return ctx.OpFindSMsb(ctx.U32[1], value);
+}
+
+Id EmitFindUMsb32(EmitContext& ctx, Id value) {
+ return ctx.OpFindUMsb(ctx.U32[1], value);
+}
+
+Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
+ const bool is_broken{ctx.profile.has_broken_signed_operations};
+ if (is_broken) {
+ a = ctx.OpBitcast(ctx.S32[1], a);
+ b = ctx.OpBitcast(ctx.S32[1], b);
+ }
+ const Id result{ctx.OpSMin(ctx.U32[1], a, b)};
+ return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
+}
+
+Id EmitUMin32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpUMin(ctx.U32[1], a, b);
+}
+
+Id EmitSMax32(EmitContext& ctx, Id a, Id b) {
+ const bool is_broken{ctx.profile.has_broken_signed_operations};
+ if (is_broken) {
+ a = ctx.OpBitcast(ctx.S32[1], a);
+ b = ctx.OpBitcast(ctx.S32[1], b);
+ }
+ const Id result{ctx.OpSMax(ctx.U32[1], a, b)};
+ return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result;
+}
+
+Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpUMax(ctx.U32[1], a, b);
+}
+
+Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
+ Id result{};
+ if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) {
+ value = ctx.OpBitcast(ctx.S32[1], value);
+ min = ctx.OpBitcast(ctx.S32[1], min);
+ max = ctx.OpBitcast(ctx.S32[1], max);
+ if (ctx.profile.has_broken_spirv_clamp) {
+ result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min);
+ } else {
+ result = ctx.OpSClamp(ctx.S32[1], value, min, max);
+ }
+ result = ctx.OpBitcast(ctx.U32[1], result);
+ } else {
+ result = ctx.OpSClamp(ctx.U32[1], value, min, max);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
+ Id result{};
+ if (ctx.profile.has_broken_spirv_clamp) {
+ result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min);
+ } else {
+ result = ctx.OpUClamp(ctx.U32[1], value, min, max);
+ }
+ SetZeroFlag(ctx, inst, result);
+ SetSignFlag(ctx, inst, result);
+ return result;
+}
+
+Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSLessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpULessThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpIEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpULessThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpUGreaterThan(ctx.U1, lhs, rhs);
+}
+
+Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpINotEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) {
+ return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
new file mode 100644
index 000000000..b9a9500fc
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpLogicalOr(ctx.U1, a, b);
+}
+
+Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpLogicalAnd(ctx.U1, a, b);
+}
+
+Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) {
+ return ctx.OpLogicalNotEqual(ctx.U1, a, b);
+}
+
+Id EmitLogicalNot(EmitContext& ctx, Id value) {
+ return ctx.OpLogicalNot(ctx.U1, value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
new file mode 100644
index 000000000..679ee2684
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -0,0 +1,275 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <bit>
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size,
+ u32 index_offset = 0) {
+ if (offset.IsImmediate()) {
+ const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset};
+ return ctx.Const(imm_offset);
+ }
+ const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
+ Id index{ctx.Def(offset)};
+ if (shift != 0) {
+ const Id shift_id{ctx.Const(shift)};
+ index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
+ }
+ if (index_offset != 0) {
+ index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset));
+ }
+ return index;
+}
+
+Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ const StorageTypeDefinition& type_def, size_t element_size,
+ Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+ if (!binding.IsImmediate()) {
+ throw NotImplementedException("Dynamic storage buffer indexing");
+ }
+ const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr};
+ const Id index{StorageIndex(ctx, offset, element_size, index_offset)};
+ return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index);
+}
+
+Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type,
+ const StorageTypeDefinition& type_def, size_t element_size,
+ Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+ const Id pointer{
+ StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
+ return ctx.OpLoad(result_type, pointer);
+}
+
+Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ u32 index_offset = 0) {
+ return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32),
+ &StorageDefinitions::U32, index_offset);
+}
+
+void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ const StorageTypeDefinition& type_def, size_t element_size,
+ Id StorageDefinitions::*member_ptr, u32 index_offset = 0) {
+ const Id pointer{
+ StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)};
+ ctx.OpStore(pointer, value);
+}
+
+void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
+ u32 index_offset = 0) {
+ WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32),
+ &StorageDefinitions::U32, index_offset);
+}
+} // Anonymous namespace
+
+void EmitLoadGlobalU8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalS8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalU16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitLoadGlobalS16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
+ if (ctx.profile.support_int64) {
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+ return ctx.Const(0u);
+}
+
+Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
+ if (ctx.profile.support_int64) {
+ return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+ return ctx.Const(0u, 0u);
+}
+
+Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
+ if (ctx.profile.support_int64) {
+ return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+ return ctx.Const(0u, 0u, 0u, 0u);
+}
+
+void EmitWriteGlobalU8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalS8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalU16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobalS16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
+ if (ctx.profile.support_int64) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
+ return;
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
+ if (ctx.profile.support_int64) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
+ return;
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
+ if (ctx.profile.support_int64) {
+ ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
+ return;
+ }
+ LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation");
+}
+
+Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpUConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8,
+ sizeof(u8), &StorageDefinitions::U8));
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset8(offset), ctx.Const(8u));
+ }
+}
+
+Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpSConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8,
+ sizeof(s8), &StorageDefinitions::S8));
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset8(offset), ctx.Const(8u));
+ }
+}
+
+Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpUConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16,
+ sizeof(u16), &StorageDefinitions::U16));
+ } else {
+ return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset16(offset), ctx.Const(16u));
+ }
+}
+
+Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) {
+ return ctx.OpSConvert(ctx.U32[1],
+ LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16,
+ sizeof(s16), &StorageDefinitions::S16));
+ } else {
+ return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset),
+ ctx.BitOffset16(offset), ctx.Const(16u));
+ }
+}
+
+Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ return LoadStorage32(ctx, binding, offset);
+}
+
+Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2,
+ sizeof(u32[2]), &StorageDefinitions::U32x2);
+ } else {
+ return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0),
+ LoadStorage32(ctx, binding, offset, 1));
+ }
+}
+
+Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4,
+ sizeof(u32[4]), &StorageDefinitions::U32x4);
+ } else {
+ return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0),
+ LoadStorage32(ctx, binding, offset, 1),
+ LoadStorage32(ctx, binding, offset, 2),
+ LoadStorage32(ctx, binding, offset, 3));
+ }
+}
+
+void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8,
+ sizeof(u8), &StorageDefinitions::U8);
+}
+
+void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8,
+ sizeof(s8), &StorageDefinitions::S8);
+}
+
+void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16,
+ sizeof(u16), &StorageDefinitions::U16);
+}
+
+void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16,
+ sizeof(s16), &StorageDefinitions::S16);
+}
+
+void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ WriteStorage32(ctx, binding, offset, value);
+}
+
+void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]),
+ &StorageDefinitions::U32x2);
+ } else {
+ for (u32 index = 0; index < 2; ++index) {
+ const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
+ WriteStorage32(ctx, binding, offset, element, index);
+ }
+ }
+}
+
+void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
+ Id value) {
+ if (ctx.profile.support_descriptor_aliasing) {
+ WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]),
+ &StorageDefinitions::U32x4);
+ } else {
+ for (u32 index = 0; index < 4; ++index) {
+ const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)};
+ WriteStorage32(ctx, binding, offset, element, index);
+ }
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
new file mode 100644
index 000000000..c5b4f4720
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U1, cond, true_value, false_value);
+}
+
+Id EmitSelectU8(EmitContext&, Id, Id, Id) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
+}
+
+Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
+}
+
+Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
+}
+
+Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
+}
+
+Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
+ return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..9a79fc7a2
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,174 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
+ const Id shift_id{ctx.Const(shift)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
+}
+
+Id Word(EmitContext& ctx, Id offset) {
+ const Id shift_id{ctx.Const(2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
+ const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))};
+ const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))};
+ const Id count_id{ctx.Const(count)};
+ return {bit, count_id};
+}
+} // Anonymous namespace
+
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+ } else {
+ return Word(ctx, offset);
+ }
+}
+
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+ return ctx.OpLoad(ctx.U32[2], pointer);
+ } else {
+ const Id shift_id{ctx.Const(2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
+ const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
+ const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
+ return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
+ ctx.OpLoad(ctx.U32[1], rhs_pointer));
+ }
+}
+
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+ return ctx.OpLoad(ctx.U32[4], pointer);
+ }
+ const Id shift_id{ctx.Const(2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ std::array<Id, 4> values{};
+ for (u32 i = 0; i < 4; ++i) {
+ const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ values[i] = ctx.OpLoad(ctx.U32[1], pointer);
+ }
+ return ctx.OpCompositeConstruct(ctx.U32[4], values);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
+ } else {
+ ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
+ }
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
+ } else {
+ ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
+ }
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
+ Id pointer{};
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
+ } else {
+ const Id shift{ctx.Const(2U)};
+ const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
+ }
+ ctx.OpStore(pointer, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+ ctx.OpStore(pointer, value);
+ return;
+ }
+ const Id shift{ctx.Const(2U)};
+ const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
+ const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
+ const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
+ ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
+ ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+ ctx.OpStore(pointer, value);
+ return;
+ }
+ const Id shift{ctx.Const(2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ for (u32 i = 0; i < 4; ++i) {
+ const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
new file mode 100644
index 000000000..9e7eb3cb1
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -0,0 +1,150 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+void ConvertDepthMode(EmitContext& ctx) {
+ const Id type{ctx.F32[1]};
+ const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)};
+ const Id z{ctx.OpCompositeExtract(type, position, 2u)};
+ const Id w{ctx.OpCompositeExtract(type, position, 3u)};
+ const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))};
+ const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)};
+ ctx.OpStore(ctx.output_position, vector);
+}
+
+void SetFixedPipelinePointSize(EmitContext& ctx) {
+ if (ctx.runtime_info.fixed_state_point_size) {
+ const float point_size{*ctx.runtime_info.fixed_state_point_size};
+ ctx.OpStore(ctx.output_point_size, ctx.Const(point_size));
+ }
+}
+
+Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one,
+ Id default_vector) {
+ switch (num_components) {
+ case 1:
+ return element == 3 ? one : zero;
+ case 2:
+ return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero);
+ case 3:
+ return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero);
+ case 4:
+ return default_vector;
+ }
+ throw InvalidArgument("Bad element");
+}
+
+Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) {
+ switch (comparison) {
+ case CompareFunction::Never:
+ return ctx.false_value;
+ case CompareFunction::Less:
+ return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2);
+ case CompareFunction::Equal:
+ return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::LessThanEqual:
+ return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::Greater:
+ return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2);
+ case CompareFunction::NotEqual:
+ return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::GreaterThanEqual:
+ return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2);
+ case CompareFunction::Always:
+ return ctx.true_value;
+ }
+ throw InvalidArgument("Comparison function {}", comparison);
+}
+
+void AlphaTest(EmitContext& ctx) {
+ if (!ctx.runtime_info.alpha_test_func) {
+ return;
+ }
+ const auto comparison{*ctx.runtime_info.alpha_test_func};
+ if (comparison == CompareFunction::Always) {
+ return;
+ }
+ if (!Sirit::ValidId(ctx.frag_color[0])) {
+ return;
+ }
+
+ const Id type{ctx.F32[1]};
+ const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])};
+ const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)};
+
+ const Id true_label{ctx.OpLabel()};
+ const Id discard_label{ctx.OpLabel()};
+ const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)};
+ const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)};
+
+ ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(condition, true_label, discard_label);
+ ctx.AddLabel(discard_label);
+ ctx.OpKill();
+ ctx.AddLabel(true_label);
+}
+} // Anonymous namespace
+
+void EmitPrologue(EmitContext& ctx) {
+ if (ctx.stage == Stage::VertexB) {
+ const Id zero{ctx.Const(0.0f)};
+ const Id one{ctx.Const(1.0f)};
+ const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)};
+ ctx.OpStore(ctx.output_position, default_vector);
+ for (const auto& info : ctx.output_generics) {
+ if (info[0].num_components == 0) {
+ continue;
+ }
+ u32 element{0};
+ while (element < 4) {
+ const auto& element_info{info[element]};
+ const u32 num{element_info.num_components};
+ const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)};
+ ctx.OpStore(element_info.id, value);
+ element += num;
+ }
+ }
+ }
+ if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) {
+ SetFixedPipelinePointSize(ctx);
+ }
+}
+
+void EmitEpilogue(EmitContext& ctx) {
+ if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) {
+ ConvertDepthMode(ctx);
+ }
+ if (ctx.stage == Stage::Fragment) {
+ AlphaTest(ctx);
+ }
+}
+
+void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
+ if (ctx.runtime_info.convert_depth_mode) {
+ ConvertDepthMode(ctx);
+ }
+ if (stream.IsImmediate()) {
+ ctx.OpEmitStreamVertex(ctx.Def(stream));
+ } else {
+ LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
+ ctx.OpEmitStreamVertex(ctx.u32_zero_value);
+ }
+ // Restore fixed pipeline point size after emitting the vertex
+ SetFixedPipelinePointSize(ctx);
+}
+
+void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
+ if (stream.IsImmediate()) {
+ ctx.OpEndStreamPrimitive(ctx.Def(stream));
+ } else {
+ LOG_WARNING(Shader_SPIRV, "Stream is not immediate");
+ ctx.OpEndStreamPrimitive(ctx.u32_zero_value);
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
new file mode 100644
index 000000000..c9f469e90
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+
+Id EmitUndefU1(EmitContext& ctx) {
+ return ctx.OpUndef(ctx.U1);
+}
+
+Id EmitUndefU8(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitUndefU16(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+Id EmitUndefU32(EmitContext& ctx) {
+ return ctx.OpUndef(ctx.U32[1]);
+}
+
+Id EmitUndefU64(EmitContext&) {
+ throw NotImplementedException("SPIR-V Instruction");
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
new file mode 100644
index 000000000..78b1e1ba7
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -0,0 +1,203 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id WarpExtract(EmitContext& ctx, Id value) {
+ const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+}
+
+Id LoadMask(EmitContext& ctx, Id mask) {
+ const Id value{ctx.OpLoad(ctx.U32[4], mask)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpCompositeExtract(ctx.U32[1], value, 0U);
+ }
+ return WarpExtract(ctx, value);
+}
+
+void SetInBoundsFlag(IR::Inst* inst, Id result) {
+ IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
+ if (!in_bounds) {
+ return;
+ }
+ in_bounds->SetDefinition(result);
+ in_bounds->Invalidate();
+}
+
+Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) {
+ return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask);
+}
+
+Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) {
+ return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id,
+ ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask));
+}
+
+Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) {
+ const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+ const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+ return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask);
+}
+
+Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
+ return ctx.OpSelect(ctx.U32[1], in_range,
+ ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
+}
+} // Anonymous namespace
+
+Id EmitLaneId(EmitContext& ctx) {
+ const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return id;
+ }
+ return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U));
+}
+
+Id EmitVoteAll(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAllKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{WarpExtract(ctx, mask_ballot)};
+ const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpIEqual(ctx.U1, lhs, active_mask);
+}
+
+Id EmitVoteAny(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{WarpExtract(ctx, mask_ballot)};
+ const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
+}
+
+Id EmitVoteEqual(EmitContext& ctx, Id pred) {
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
+ }
+ const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
+ const Id active_mask{WarpExtract(ctx, mask_ballot)};
+ const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
+ const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
+ return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
+ ctx.OpIEqual(ctx.U1, lhs, active_mask));
+}
+
+Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
+ const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
+ }
+ return WarpExtract(ctx, ballot);
+}
+
+Id EmitSubgroupEqMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_eq);
+}
+
+Id EmitSubgroupLtMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_lt);
+}
+
+Id EmitSubgroupLeMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_le);
+}
+
+Id EmitSubgroupGtMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_gt);
+}
+
+Id EmitSubgroupGeMask(EmitContext& ctx) {
+ return LoadMask(ctx, ctx.subgroup_mask_ge);
+}
+
+Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
+ const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
+
+ const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
+ const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
+ const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+ const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
+ const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+ const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
+ const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
+ Id segmentation_mask) {
+ const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
+ const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
+ const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+
+ SetInBoundsFlag(inst, in_range);
+ return SelectValue(ctx, in_range, value, src_thread_id);
+}
+
+Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
+ const Id three{ctx.Const(3U)};
+ Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+ mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+ mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U));
+ mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
+ mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
+
+ const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
+ const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
+
+ const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
+ const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
+ return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
+}
+
+Id EmitDPdxFine(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdxFine(ctx.F32[1], op_a);
+}
+
+Id EmitDPdyFine(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdyFine(ctx.F32[1], op_a);
+}
+
+Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdxCoarse(ctx.F32[1], op_a);
+}
+
+Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) {
+ return ctx.OpDPdyCoarse(ctx.F32[1], op_a);
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
new file mode 100644
index 000000000..8369d0d84
--- /dev/null
+++ b/src/shader_recompiler/environment.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <array>
+
+#include "common/common_types.h"
+#include "shader_recompiler/program_header.h"
+#include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader {
+
+class Environment {
+public:
+ virtual ~Environment() = default;
+
+ [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
+
+ [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0;
+
+ [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0;
+
+ [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
+
+ [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
+
+ [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
+
+ [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
+
+ [[nodiscard]] const ProgramHeader& SPH() const noexcept {
+ return sph;
+ }
+
+ [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept {
+ return gp_passthrough_mask;
+ }
+
+ [[nodiscard]] Stage ShaderStage() const noexcept {
+ return stage;
+ }
+
+ [[nodiscard]] u32 StartAddress() const noexcept {
+ return start_address;
+ }
+
+protected:
+ ProgramHeader sph{};
+ std::array<u32, 8> gp_passthrough_mask{};
+ Stage stage{};
+ u32 start_address{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h
new file mode 100644
index 000000000..277be8541
--- /dev/null
+++ b/src/shader_recompiler/exception.h
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <exception>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include <fmt/format.h>
+
+namespace Shader {
+
+class Exception : public std::exception {
+public:
+ explicit Exception(std::string message) noexcept : err_message{std::move(message)} {}
+
+ [[nodiscard]] const char* what() const noexcept override {
+ return err_message.c_str();
+ }
+
+ void Prepend(std::string_view prepend) {
+ err_message.insert(0, prepend);
+ }
+
+ void Append(std::string_view append) {
+ err_message += append;
+ }
+
+private:
+ std::string err_message;
+};
+
+class LogicError : public Exception {
+public:
+ template <typename... Args>
+ explicit LogicError(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+class RuntimeError : public Exception {
+public:
+ template <typename... Args>
+ explicit RuntimeError(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+class NotImplementedException : public Exception {
+public:
+ template <typename... Args>
+ explicit NotImplementedException(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {
+ Append(" is not implemented");
+ }
+};
+
+class InvalidArgument : public Exception {
+public:
+ template <typename... Args>
+ explicit InvalidArgument(const char* message, Args&&... args)
+ : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {}
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 000000000..b61773487
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+
+struct AbstractSyntaxNode {
+ enum class Type {
+ Block,
+ If,
+ EndIf,
+ Loop,
+ Repeat,
+ Break,
+ Return,
+ Unreachable,
+ };
+ union Data {
+ Block* block;
+ struct {
+ U1 cond;
+ Block* body;
+ Block* merge;
+ } if_node;
+ struct {
+ Block* merge;
+ } end_if;
+ struct {
+ Block* body;
+ Block* continue_block;
+ Block* merge;
+ } loop;
+ struct {
+ U1 cond;
+ Block* loop_header;
+ Block* merge;
+ } repeat;
+ struct {
+ U1 cond;
+ Block* merge;
+ Block* skip;
+ } break_node;
+ };
+
+ Data data{};
+ Type type{};
+};
+using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
new file mode 100644
index 000000000..4d0b8b8e5
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -0,0 +1,454 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/attribute.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Attribute attribute) noexcept {
+ return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
+}
+
+u32 GenericAttributeIndex(Attribute attribute) {
+ if (!IsGeneric(attribute)) {
+ throw InvalidArgument("Attribute is not generic {}", attribute);
+ }
+ return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
+}
+
+u32 GenericAttributeElement(Attribute attribute) {
+ if (!IsGeneric(attribute)) {
+ throw InvalidArgument("Attribute is not generic {}", attribute);
+ }
+ return static_cast<u32>(attribute) % 4;
+}
+
+std::string NameOf(Attribute attribute) {
+ switch (attribute) {
+ case Attribute::PrimitiveId:
+ return "PrimitiveId";
+ case Attribute::Layer:
+ return "Layer";
+ case Attribute::ViewportIndex:
+ return "ViewportIndex";
+ case Attribute::PointSize:
+ return "PointSize";
+ case Attribute::PositionX:
+ return "Position.X";
+ case Attribute::PositionY:
+ return "Position.Y";
+ case Attribute::PositionZ:
+ return "Position.Z";
+ case Attribute::PositionW:
+ return "Position.W";
+ case Attribute::Generic0X:
+ return "Generic[0].X";
+ case Attribute::Generic0Y:
+ return "Generic[0].Y";
+ case Attribute::Generic0Z:
+ return "Generic[0].Z";
+ case Attribute::Generic0W:
+ return "Generic[0].W";
+ case Attribute::Generic1X:
+ return "Generic[1].X";
+ case Attribute::Generic1Y:
+ return "Generic[1].Y";
+ case Attribute::Generic1Z:
+ return "Generic[1].Z";
+ case Attribute::Generic1W:
+ return "Generic[1].W";
+ case Attribute::Generic2X:
+ return "Generic[2].X";
+ case Attribute::Generic2Y:
+ return "Generic[2].Y";
+ case Attribute::Generic2Z:
+ return "Generic[2].Z";
+ case Attribute::Generic2W:
+ return "Generic[2].W";
+ case Attribute::Generic3X:
+ return "Generic[3].X";
+ case Attribute::Generic3Y:
+ return "Generic[3].Y";
+ case Attribute::Generic3Z:
+ return "Generic[3].Z";
+ case Attribute::Generic3W:
+ return "Generic[3].W";
+ case Attribute::Generic4X:
+ return "Generic[4].X";
+ case Attribute::Generic4Y:
+ return "Generic[4].Y";
+ case Attribute::Generic4Z:
+ return "Generic[4].Z";
+ case Attribute::Generic4W:
+ return "Generic[4].W";
+ case Attribute::Generic5X:
+ return "Generic[5].X";
+ case Attribute::Generic5Y:
+ return "Generic[5].Y";
+ case Attribute::Generic5Z:
+ return "Generic[5].Z";
+ case Attribute::Generic5W:
+ return "Generic[5].W";
+ case Attribute::Generic6X:
+ return "Generic[6].X";
+ case Attribute::Generic6Y:
+ return "Generic[6].Y";
+ case Attribute::Generic6Z:
+ return "Generic[6].Z";
+ case Attribute::Generic6W:
+ return "Generic[6].W";
+ case Attribute::Generic7X:
+ return "Generic[7].X";
+ case Attribute::Generic7Y:
+ return "Generic[7].Y";
+ case Attribute::Generic7Z:
+ return "Generic[7].Z";
+ case Attribute::Generic7W:
+ return "Generic[7].W";
+ case Attribute::Generic8X:
+ return "Generic[8].X";
+ case Attribute::Generic8Y:
+ return "Generic[8].Y";
+ case Attribute::Generic8Z:
+ return "Generic[8].Z";
+ case Attribute::Generic8W:
+ return "Generic[8].W";
+ case Attribute::Generic9X:
+ return "Generic[9].X";
+ case Attribute::Generic9Y:
+ return "Generic[9].Y";
+ case Attribute::Generic9Z:
+ return "Generic[9].Z";
+ case Attribute::Generic9W:
+ return "Generic[9].W";
+ case Attribute::Generic10X:
+ return "Generic[10].X";
+ case Attribute::Generic10Y:
+ return "Generic[10].Y";
+ case Attribute::Generic10Z:
+ return "Generic[10].Z";
+ case Attribute::Generic10W:
+ return "Generic[10].W";
+ case Attribute::Generic11X:
+ return "Generic[11].X";
+ case Attribute::Generic11Y:
+ return "Generic[11].Y";
+ case Attribute::Generic11Z:
+ return "Generic[11].Z";
+ case Attribute::Generic11W:
+ return "Generic[11].W";
+ case Attribute::Generic12X:
+ return "Generic[12].X";
+ case Attribute::Generic12Y:
+ return "Generic[12].Y";
+ case Attribute::Generic12Z:
+ return "Generic[12].Z";
+ case Attribute::Generic12W:
+ return "Generic[12].W";
+ case Attribute::Generic13X:
+ return "Generic[13].X";
+ case Attribute::Generic13Y:
+ return "Generic[13].Y";
+ case Attribute::Generic13Z:
+ return "Generic[13].Z";
+ case Attribute::Generic13W:
+ return "Generic[13].W";
+ case Attribute::Generic14X:
+ return "Generic[14].X";
+ case Attribute::Generic14Y:
+ return "Generic[14].Y";
+ case Attribute::Generic14Z:
+ return "Generic[14].Z";
+ case Attribute::Generic14W:
+ return "Generic[14].W";
+ case Attribute::Generic15X:
+ return "Generic[15].X";
+ case Attribute::Generic15Y:
+ return "Generic[15].Y";
+ case Attribute::Generic15Z:
+ return "Generic[15].Z";
+ case Attribute::Generic15W:
+ return "Generic[15].W";
+ case Attribute::Generic16X:
+ return "Generic[16].X";
+ case Attribute::Generic16Y:
+ return "Generic[16].Y";
+ case Attribute::Generic16Z:
+ return "Generic[16].Z";
+ case Attribute::Generic16W:
+ return "Generic[16].W";
+ case Attribute::Generic17X:
+ return "Generic[17].X";
+ case Attribute::Generic17Y:
+ return "Generic[17].Y";
+ case Attribute::Generic17Z:
+ return "Generic[17].Z";
+ case Attribute::Generic17W:
+ return "Generic[17].W";
+ case Attribute::Generic18X:
+ return "Generic[18].X";
+ case Attribute::Generic18Y:
+ return "Generic[18].Y";
+ case Attribute::Generic18Z:
+ return "Generic[18].Z";
+ case Attribute::Generic18W:
+ return "Generic[18].W";
+ case Attribute::Generic19X:
+ return "Generic[19].X";
+ case Attribute::Generic19Y:
+ return "Generic[19].Y";
+ case Attribute::Generic19Z:
+ return "Generic[19].Z";
+ case Attribute::Generic19W:
+ return "Generic[19].W";
+ case Attribute::Generic20X:
+ return "Generic[20].X";
+ case Attribute::Generic20Y:
+ return "Generic[20].Y";
+ case Attribute::Generic20Z:
+ return "Generic[20].Z";
+ case Attribute::Generic20W:
+ return "Generic[20].W";
+ case Attribute::Generic21X:
+ return "Generic[21].X";
+ case Attribute::Generic21Y:
+ return "Generic[21].Y";
+ case Attribute::Generic21Z:
+ return "Generic[21].Z";
+ case Attribute::Generic21W:
+ return "Generic[21].W";
+ case Attribute::Generic22X:
+ return "Generic[22].X";
+ case Attribute::Generic22Y:
+ return "Generic[22].Y";
+ case Attribute::Generic22Z:
+ return "Generic[22].Z";
+ case Attribute::Generic22W:
+ return "Generic[22].W";
+ case Attribute::Generic23X:
+ return "Generic[23].X";
+ case Attribute::Generic23Y:
+ return "Generic[23].Y";
+ case Attribute::Generic23Z:
+ return "Generic[23].Z";
+ case Attribute::Generic23W:
+ return "Generic[23].W";
+ case Attribute::Generic24X:
+ return "Generic[24].X";
+ case Attribute::Generic24Y:
+ return "Generic[24].Y";
+ case Attribute::Generic24Z:
+ return "Generic[24].Z";
+ case Attribute::Generic24W:
+ return "Generic[24].W";
+ case Attribute::Generic25X:
+ return "Generic[25].X";
+ case Attribute::Generic25Y:
+ return "Generic[25].Y";
+ case Attribute::Generic25Z:
+ return "Generic[25].Z";
+ case Attribute::Generic25W:
+ return "Generic[25].W";
+ case Attribute::Generic26X:
+ return "Generic[26].X";
+ case Attribute::Generic26Y:
+ return "Generic[26].Y";
+ case Attribute::Generic26Z:
+ return "Generic[26].Z";
+ case Attribute::Generic26W:
+ return "Generic[26].W";
+ case Attribute::Generic27X:
+ return "Generic[27].X";
+ case Attribute::Generic27Y:
+ return "Generic[27].Y";
+ case Attribute::Generic27Z:
+ return "Generic[27].Z";
+ case Attribute::Generic27W:
+ return "Generic[27].W";
+ case Attribute::Generic28X:
+ return "Generic[28].X";
+ case Attribute::Generic28Y:
+ return "Generic[28].Y";
+ case Attribute::Generic28Z:
+ return "Generic[28].Z";
+ case Attribute::Generic28W:
+ return "Generic[28].W";
+ case Attribute::Generic29X:
+ return "Generic[29].X";
+ case Attribute::Generic29Y:
+ return "Generic[29].Y";
+ case Attribute::Generic29Z:
+ return "Generic[29].Z";
+ case Attribute::Generic29W:
+ return "Generic[29].W";
+ case Attribute::Generic30X:
+ return "Generic[30].X";
+ case Attribute::Generic30Y:
+ return "Generic[30].Y";
+ case Attribute::Generic30Z:
+ return "Generic[30].Z";
+ case Attribute::Generic30W:
+ return "Generic[30].W";
+ case Attribute::Generic31X:
+ return "Generic[31].X";
+ case Attribute::Generic31Y:
+ return "Generic[31].Y";
+ case Attribute::Generic31Z:
+ return "Generic[31].Z";
+ case Attribute::Generic31W:
+ return "Generic[31].W";
+ case Attribute::ColorFrontDiffuseR:
+ return "ColorFrontDiffuse.R";
+ case Attribute::ColorFrontDiffuseG:
+ return "ColorFrontDiffuse.G";
+ case Attribute::ColorFrontDiffuseB:
+ return "ColorFrontDiffuse.B";
+ case Attribute::ColorFrontDiffuseA:
+ return "ColorFrontDiffuse.A";
+ case Attribute::ColorFrontSpecularR:
+ return "ColorFrontSpecular.R";
+ case Attribute::ColorFrontSpecularG:
+ return "ColorFrontSpecular.G";
+ case Attribute::ColorFrontSpecularB:
+ return "ColorFrontSpecular.B";
+ case Attribute::ColorFrontSpecularA:
+ return "ColorFrontSpecular.A";
+ case Attribute::ColorBackDiffuseR:
+ return "ColorBackDiffuse.R";
+ case Attribute::ColorBackDiffuseG:
+ return "ColorBackDiffuse.G";
+ case Attribute::ColorBackDiffuseB:
+ return "ColorBackDiffuse.B";
+ case Attribute::ColorBackDiffuseA:
+ return "ColorBackDiffuse.A";
+ case Attribute::ColorBackSpecularR:
+ return "ColorBackSpecular.R";
+ case Attribute::ColorBackSpecularG:
+ return "ColorBackSpecular.G";
+ case Attribute::ColorBackSpecularB:
+ return "ColorBackSpecular.B";
+ case Attribute::ColorBackSpecularA:
+ return "ColorBackSpecular.A";
+ case Attribute::ClipDistance0:
+ return "ClipDistance[0]";
+ case Attribute::ClipDistance1:
+ return "ClipDistance[1]";
+ case Attribute::ClipDistance2:
+ return "ClipDistance[2]";
+ case Attribute::ClipDistance3:
+ return "ClipDistance[3]";
+ case Attribute::ClipDistance4:
+ return "ClipDistance[4]";
+ case Attribute::ClipDistance5:
+ return "ClipDistance[5]";
+ case Attribute::ClipDistance6:
+ return "ClipDistance[6]";
+ case Attribute::ClipDistance7:
+ return "ClipDistance[7]";
+ case Attribute::PointSpriteS:
+ return "PointSprite.S";
+ case Attribute::PointSpriteT:
+ return "PointSprite.T";
+ case Attribute::FogCoordinate:
+ return "FogCoordinate";
+ case Attribute::TessellationEvaluationPointU:
+ return "TessellationEvaluationPoint.U";
+ case Attribute::TessellationEvaluationPointV:
+ return "TessellationEvaluationPoint.V";
+ case Attribute::InstanceId:
+ return "InstanceId";
+ case Attribute::VertexId:
+ return "VertexId";
+ case Attribute::FixedFncTexture0S:
+ return "FixedFncTexture[0].S";
+ case Attribute::FixedFncTexture0T:
+ return "FixedFncTexture[0].T";
+ case Attribute::FixedFncTexture0R:
+ return "FixedFncTexture[0].R";
+ case Attribute::FixedFncTexture0Q:
+ return "FixedFncTexture[0].Q";
+ case Attribute::FixedFncTexture1S:
+ return "FixedFncTexture[1].S";
+ case Attribute::FixedFncTexture1T:
+ return "FixedFncTexture[1].T";
+ case Attribute::FixedFncTexture1R:
+ return "FixedFncTexture[1].R";
+ case Attribute::FixedFncTexture1Q:
+ return "FixedFncTexture[1].Q";
+ case Attribute::FixedFncTexture2S:
+ return "FixedFncTexture[2].S";
+ case Attribute::FixedFncTexture2T:
+ return "FixedFncTexture[2].T";
+ case Attribute::FixedFncTexture2R:
+ return "FixedFncTexture[2].R";
+ case Attribute::FixedFncTexture2Q:
+ return "FixedFncTexture[2].Q";
+ case Attribute::FixedFncTexture3S:
+ return "FixedFncTexture[3].S";
+ case Attribute::FixedFncTexture3T:
+ return "FixedFncTexture[3].T";
+ case Attribute::FixedFncTexture3R:
+ return "FixedFncTexture[3].R";
+ case Attribute::FixedFncTexture3Q:
+ return "FixedFncTexture[3].Q";
+ case Attribute::FixedFncTexture4S:
+ return "FixedFncTexture[4].S";
+ case Attribute::FixedFncTexture4T:
+ return "FixedFncTexture[4].T";
+ case Attribute::FixedFncTexture4R:
+ return "FixedFncTexture[4].R";
+ case Attribute::FixedFncTexture4Q:
+ return "FixedFncTexture[4].Q";
+ case Attribute::FixedFncTexture5S:
+ return "FixedFncTexture[5].S";
+ case Attribute::FixedFncTexture5T:
+ return "FixedFncTexture[5].T";
+ case Attribute::FixedFncTexture5R:
+ return "FixedFncTexture[5].R";
+ case Attribute::FixedFncTexture5Q:
+ return "FixedFncTexture[5].Q";
+ case Attribute::FixedFncTexture6S:
+ return "FixedFncTexture[6].S";
+ case Attribute::FixedFncTexture6T:
+ return "FixedFncTexture[6].T";
+ case Attribute::FixedFncTexture6R:
+ return "FixedFncTexture[6].R";
+ case Attribute::FixedFncTexture6Q:
+ return "FixedFncTexture[6].Q";
+ case Attribute::FixedFncTexture7S:
+ return "FixedFncTexture[7].S";
+ case Attribute::FixedFncTexture7T:
+ return "FixedFncTexture[7].T";
+ case Attribute::FixedFncTexture7R:
+ return "FixedFncTexture[7].R";
+ case Attribute::FixedFncTexture7Q:
+ return "FixedFncTexture[7].Q";
+ case Attribute::FixedFncTexture8S:
+ return "FixedFncTexture[8].S";
+ case Attribute::FixedFncTexture8T:
+ return "FixedFncTexture[8].T";
+ case Attribute::FixedFncTexture8R:
+ return "FixedFncTexture[8].R";
+ case Attribute::FixedFncTexture8Q:
+ return "FixedFncTexture[8].Q";
+ case Attribute::FixedFncTexture9S:
+ return "FixedFncTexture[9].S";
+ case Attribute::FixedFncTexture9T:
+ return "FixedFncTexture[9].T";
+ case Attribute::FixedFncTexture9R:
+ return "FixedFncTexture[9].R";
+ case Attribute::FixedFncTexture9Q:
+ return "FixedFncTexture[9].Q";
+ case Attribute::ViewportMask:
+ return "ViewportMask";
+ case Attribute::FrontFace:
+ return "FrontFace";
+ }
+ return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
new file mode 100644
index 000000000..ca1199494
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Attribute : u64 {
+ PrimitiveId = 24,
+ Layer = 25,
+ ViewportIndex = 26,
+ PointSize = 27,
+ PositionX = 28,
+ PositionY = 29,
+ PositionZ = 30,
+ PositionW = 31,
+ Generic0X = 32,
+ Generic0Y = 33,
+ Generic0Z = 34,
+ Generic0W = 35,
+ Generic1X = 36,
+ Generic1Y = 37,
+ Generic1Z = 38,
+ Generic1W = 39,
+ Generic2X = 40,
+ Generic2Y = 41,
+ Generic2Z = 42,
+ Generic2W = 43,
+ Generic3X = 44,
+ Generic3Y = 45,
+ Generic3Z = 46,
+ Generic3W = 47,
+ Generic4X = 48,
+ Generic4Y = 49,
+ Generic4Z = 50,
+ Generic4W = 51,
+ Generic5X = 52,
+ Generic5Y = 53,
+ Generic5Z = 54,
+ Generic5W = 55,
+ Generic6X = 56,
+ Generic6Y = 57,
+ Generic6Z = 58,
+ Generic6W = 59,
+ Generic7X = 60,
+ Generic7Y = 61,
+ Generic7Z = 62,
+ Generic7W = 63,
+ Generic8X = 64,
+ Generic8Y = 65,
+ Generic8Z = 66,
+ Generic8W = 67,
+ Generic9X = 68,
+ Generic9Y = 69,
+ Generic9Z = 70,
+ Generic9W = 71,
+ Generic10X = 72,
+ Generic10Y = 73,
+ Generic10Z = 74,
+ Generic10W = 75,
+ Generic11X = 76,
+ Generic11Y = 77,
+ Generic11Z = 78,
+ Generic11W = 79,
+ Generic12X = 80,
+ Generic12Y = 81,
+ Generic12Z = 82,
+ Generic12W = 83,
+ Generic13X = 84,
+ Generic13Y = 85,
+ Generic13Z = 86,
+ Generic13W = 87,
+ Generic14X = 88,
+ Generic14Y = 89,
+ Generic14Z = 90,
+ Generic14W = 91,
+ Generic15X = 92,
+ Generic15Y = 93,
+ Generic15Z = 94,
+ Generic15W = 95,
+ Generic16X = 96,
+ Generic16Y = 97,
+ Generic16Z = 98,
+ Generic16W = 99,
+ Generic17X = 100,
+ Generic17Y = 101,
+ Generic17Z = 102,
+ Generic17W = 103,
+ Generic18X = 104,
+ Generic18Y = 105,
+ Generic18Z = 106,
+ Generic18W = 107,
+ Generic19X = 108,
+ Generic19Y = 109,
+ Generic19Z = 110,
+ Generic19W = 111,
+ Generic20X = 112,
+ Generic20Y = 113,
+ Generic20Z = 114,
+ Generic20W = 115,
+ Generic21X = 116,
+ Generic21Y = 117,
+ Generic21Z = 118,
+ Generic21W = 119,
+ Generic22X = 120,
+ Generic22Y = 121,
+ Generic22Z = 122,
+ Generic22W = 123,
+ Generic23X = 124,
+ Generic23Y = 125,
+ Generic23Z = 126,
+ Generic23W = 127,
+ Generic24X = 128,
+ Generic24Y = 129,
+ Generic24Z = 130,
+ Generic24W = 131,
+ Generic25X = 132,
+ Generic25Y = 133,
+ Generic25Z = 134,
+ Generic25W = 135,
+ Generic26X = 136,
+ Generic26Y = 137,
+ Generic26Z = 138,
+ Generic26W = 139,
+ Generic27X = 140,
+ Generic27Y = 141,
+ Generic27Z = 142,
+ Generic27W = 143,
+ Generic28X = 144,
+ Generic28Y = 145,
+ Generic28Z = 146,
+ Generic28W = 147,
+ Generic29X = 148,
+ Generic29Y = 149,
+ Generic29Z = 150,
+ Generic29W = 151,
+ Generic30X = 152,
+ Generic30Y = 153,
+ Generic30Z = 154,
+ Generic30W = 155,
+ Generic31X = 156,
+ Generic31Y = 157,
+ Generic31Z = 158,
+ Generic31W = 159,
+ ColorFrontDiffuseR = 160,
+ ColorFrontDiffuseG = 161,
+ ColorFrontDiffuseB = 162,
+ ColorFrontDiffuseA = 163,
+ ColorFrontSpecularR = 164,
+ ColorFrontSpecularG = 165,
+ ColorFrontSpecularB = 166,
+ ColorFrontSpecularA = 167,
+ ColorBackDiffuseR = 168,
+ ColorBackDiffuseG = 169,
+ ColorBackDiffuseB = 170,
+ ColorBackDiffuseA = 171,
+ ColorBackSpecularR = 172,
+ ColorBackSpecularG = 173,
+ ColorBackSpecularB = 174,
+ ColorBackSpecularA = 175,
+ ClipDistance0 = 176,
+ ClipDistance1 = 177,
+ ClipDistance2 = 178,
+ ClipDistance3 = 179,
+ ClipDistance4 = 180,
+ ClipDistance5 = 181,
+ ClipDistance6 = 182,
+ ClipDistance7 = 183,
+ PointSpriteS = 184,
+ PointSpriteT = 185,
+ FogCoordinate = 186,
+ TessellationEvaluationPointU = 188,
+ TessellationEvaluationPointV = 189,
+ InstanceId = 190,
+ VertexId = 191,
+ FixedFncTexture0S = 192,
+ FixedFncTexture0T = 193,
+ FixedFncTexture0R = 194,
+ FixedFncTexture0Q = 195,
+ FixedFncTexture1S = 196,
+ FixedFncTexture1T = 197,
+ FixedFncTexture1R = 198,
+ FixedFncTexture1Q = 199,
+ FixedFncTexture2S = 200,
+ FixedFncTexture2T = 201,
+ FixedFncTexture2R = 202,
+ FixedFncTexture2Q = 203,
+ FixedFncTexture3S = 204,
+ FixedFncTexture3T = 205,
+ FixedFncTexture3R = 206,
+ FixedFncTexture3Q = 207,
+ FixedFncTexture4S = 208,
+ FixedFncTexture4T = 209,
+ FixedFncTexture4R = 210,
+ FixedFncTexture4Q = 211,
+ FixedFncTexture5S = 212,
+ FixedFncTexture5T = 213,
+ FixedFncTexture5R = 214,
+ FixedFncTexture5Q = 215,
+ FixedFncTexture6S = 216,
+ FixedFncTexture6T = 217,
+ FixedFncTexture6R = 218,
+ FixedFncTexture6Q = 219,
+ FixedFncTexture7S = 220,
+ FixedFncTexture7T = 221,
+ FixedFncTexture7R = 222,
+ FixedFncTexture7Q = 223,
+ FixedFncTexture8S = 224,
+ FixedFncTexture8T = 225,
+ FixedFncTexture8R = 226,
+ FixedFncTexture8Q = 227,
+ FixedFncTexture9S = 228,
+ FixedFncTexture9T = 229,
+ FixedFncTexture9R = 230,
+ FixedFncTexture9Q = 231,
+ ViewportMask = 232,
+ FrontFace = 255,
+};
+
+constexpr size_t NUM_GENERICS = 32;
+
+[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
+
+[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
+
+[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
+
+[[nodiscard]] std::string NameOf(Attribute attribute);
+
+[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
+ return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Attribute> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
new file mode 100644
index 000000000..7c08b25ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <initializer_list>
+#include <map>
+#include <memory>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
+
+Block::~Block() = default;
+
+void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
+ PrependNewInst(end(), op, args);
+}
+
+Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
+ std::initializer_list<Value> args, u32 flags) {
+ Inst* const inst{inst_pool->Create(op, flags)};
+ const auto result_it{instructions.insert(insertion_point, *inst)};
+
+ if (inst->NumArgs() != args.size()) {
+ throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
+ }
+ std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
+ inst->SetArg(index, arg);
+ ++index;
+ });
+ return result_it;
+}
+
+void Block::AddBranch(Block* block) {
+ if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
+ throw LogicError("Successor already inserted");
+ }
+ if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
+ throw LogicError("Predecessor already inserted");
+ }
+ imm_successors.push_back(block);
+ block->imm_predecessors.push_back(this);
+}
+
+static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
+ Block* block) {
+ if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
+ return fmt::format("{{Block ${}}}", it->second);
+ }
+ return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
+}
+
+static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+ const Inst* inst) {
+ const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
+ if (is_inserted) {
+ ++inst_index;
+ }
+ return it->second;
+}
+
+static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
+ const Value& arg) {
+ if (arg.IsEmpty()) {
+ return "<null>";
+ }
+ if (!arg.IsImmediate() || arg.IsIdentity()) {
+ return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
+ }
+ switch (arg.Type()) {
+ case Type::U1:
+ return fmt::format("#{}", arg.U1() ? "true" : "false");
+ case Type::U8:
+ return fmt::format("#{}", arg.U8());
+ case Type::U16:
+ return fmt::format("#{}", arg.U16());
+ case Type::U32:
+ return fmt::format("#{}", arg.U32());
+ case Type::U64:
+ return fmt::format("#{}", arg.U64());
+ case Type::F32:
+ return fmt::format("#{}", arg.F32());
+ case Type::Reg:
+ return fmt::format("{}", arg.Reg());
+ case Type::Pred:
+ return fmt::format("{}", arg.Pred());
+ case Type::Attribute:
+ return fmt::format("{}", arg.Attribute());
+ default:
+ return "<unknown immediate type>";
+ }
+}
+
+std::string DumpBlock(const Block& block) {
+ size_t inst_index{0};
+ std::map<const Inst*, size_t> inst_to_index;
+ return DumpBlock(block, {}, inst_to_index, inst_index);
+}
+
+std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
+ std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
+ std::string ret{"Block"};
+ if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
+ ret += fmt::format(" ${}", it->second);
+ }
+ ret += '\n';
+ for (const Inst& inst : block) {
+ const Opcode op{inst.GetOpcode()};
+ ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
+ if (TypeOf(op) != Type::Void) {
+ ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
+ } else {
+ ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
+ }
+ const size_t arg_count{inst.NumArgs()};
+ for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
+ const Value arg{inst.Arg(arg_index)};
+ const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
+ ret += arg_index != 0 ? ", " : " ";
+ if (op == Opcode::Phi) {
+ ret += fmt::format("[ {}, {} ]", arg_str,
+ BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
+ } else {
+ ret += arg_str;
+ }
+ if (op != Opcode::Phi) {
+ const Type actual_type{arg.Type()};
+ const Type expected_type{ArgTypeOf(op, arg_index)};
+ if (!AreTypesCompatible(actual_type, expected_type)) {
+ ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
+ }
+ }
+ }
+ if (TypeOf(op) != Type::Void) {
+ ret += fmt::format(" (uses: {})\n", inst.UseCount());
+ } else {
+ ret += '\n';
+ }
+ }
+ return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
new file mode 100644
index 000000000..7e134b4c7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -0,0 +1,185 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <initializer_list>
+#include <map>
+#include <span>
+#include <vector>
+
+#include <boost/intrusive/list.hpp>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::IR {
+
+class Block {
+public:
+ using InstructionList = boost::intrusive::list<Inst>;
+ using size_type = InstructionList::size_type;
+ using iterator = InstructionList::iterator;
+ using const_iterator = InstructionList::const_iterator;
+ using reverse_iterator = InstructionList::reverse_iterator;
+ using const_reverse_iterator = InstructionList::const_reverse_iterator;
+
+ explicit Block(ObjectPool<Inst>& inst_pool_);
+ ~Block();
+
+ Block(const Block&) = delete;
+ Block& operator=(const Block&) = delete;
+
+ Block(Block&&) = default;
+ Block& operator=(Block&&) = default;
+
+ /// Appends a new instruction to the end of this basic block.
+ void AppendNewInst(Opcode op, std::initializer_list<Value> args);
+
+ /// Prepends a new instruction to this basic block before the insertion point.
+ iterator PrependNewInst(iterator insertion_point, Opcode op,
+ std::initializer_list<Value> args = {}, u32 flags = 0);
+
+ /// Adds a new branch to this basic block.
+ void AddBranch(Block* block);
+
+ /// Gets a mutable reference to the instruction list for this basic block.
+ [[nodiscard]] InstructionList& Instructions() noexcept {
+ return instructions;
+ }
+ /// Gets an immutable reference to the instruction list for this basic block.
+ [[nodiscard]] const InstructionList& Instructions() const noexcept {
+ return instructions;
+ }
+
+ /// Gets an immutable span to the immediate predecessors.
+ [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
+ return imm_predecessors;
+ }
+ /// Gets an immutable span to the immediate successors.
+ [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
+ return imm_successors;
+ }
+
+ /// Intrusively store the host definition of this instruction.
+ template <typename DefinitionType>
+ void SetDefinition(DefinitionType def) {
+ definition = Common::BitCast<u32>(def);
+ }
+
+ /// Return the intrusively stored host definition of this instruction.
+ template <typename DefinitionType>
+ [[nodiscard]] DefinitionType Definition() const noexcept {
+ return Common::BitCast<DefinitionType>(definition);
+ }
+
+ void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
+ ssa_reg_values[RegIndex(reg)] = value;
+ }
+ const Value& SsaRegValue(IR::Reg reg) const noexcept {
+ return ssa_reg_values[RegIndex(reg)];
+ }
+
+ void SsaSeal() noexcept {
+ is_ssa_sealed = true;
+ }
+ [[nodiscard]] bool IsSsaSealed() const noexcept {
+ return is_ssa_sealed;
+ }
+
+ [[nodiscard]] bool empty() const {
+ return instructions.empty();
+ }
+ [[nodiscard]] size_type size() const {
+ return instructions.size();
+ }
+
+ [[nodiscard]] Inst& front() {
+ return instructions.front();
+ }
+ [[nodiscard]] const Inst& front() const {
+ return instructions.front();
+ }
+
+ [[nodiscard]] Inst& back() {
+ return instructions.back();
+ }
+ [[nodiscard]] const Inst& back() const {
+ return instructions.back();
+ }
+
+ [[nodiscard]] iterator begin() {
+ return instructions.begin();
+ }
+ [[nodiscard]] const_iterator begin() const {
+ return instructions.begin();
+ }
+ [[nodiscard]] iterator end() {
+ return instructions.end();
+ }
+ [[nodiscard]] const_iterator end() const {
+ return instructions.end();
+ }
+
+ [[nodiscard]] reverse_iterator rbegin() {
+ return instructions.rbegin();
+ }
+ [[nodiscard]] const_reverse_iterator rbegin() const {
+ return instructions.rbegin();
+ }
+ [[nodiscard]] reverse_iterator rend() {
+ return instructions.rend();
+ }
+ [[nodiscard]] const_reverse_iterator rend() const {
+ return instructions.rend();
+ }
+
+ [[nodiscard]] const_iterator cbegin() const {
+ return instructions.cbegin();
+ }
+ [[nodiscard]] const_iterator cend() const {
+ return instructions.cend();
+ }
+
+ [[nodiscard]] const_reverse_iterator crbegin() const {
+ return instructions.crbegin();
+ }
+ [[nodiscard]] const_reverse_iterator crend() const {
+ return instructions.crend();
+ }
+
+private:
+ /// Memory pool for instruction list
+ ObjectPool<Inst>* inst_pool;
+
+ /// List of instructions in this block
+ InstructionList instructions;
+
+ /// Block immediate predecessors
+ std::vector<Block*> imm_predecessors;
+ /// Block immediate successors
+ std::vector<Block*> imm_successors;
+
+ /// Intrusively store the value of a register in the block.
+ std::array<Value, NUM_REGS> ssa_reg_values;
+ /// Intrusively store if the block is sealed in the SSA pass.
+ bool is_ssa_sealed{false};
+
+ /// Intrusively stored host definition of this block.
+ u32 definition{};
+};
+
+using BlockList = std::vector<Block*>;
+
+[[nodiscard]] std::string DumpBlock(const Block& block);
+
+[[nodiscard]] std::string DumpBlock(const Block& block,
+ const std::map<const Block*, size_t>& block_to_index,
+ std::map<const Inst*, size_t>& inst_to_index,
+ size_t& inst_index);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
new file mode 100644
index 000000000..a52ccbd58
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -0,0 +1,56 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <type_traits>
+#include <queue>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+template <typename Pred>
+auto BreadthFirstSearch(const Value& value, Pred&& pred)
+ -> std::invoke_result_t<Pred, const Inst*> {
+ if (value.IsImmediate()) {
+ // Nothing to do with immediates
+ return std::nullopt;
+ }
+ // Breadth-first search visiting the right most arguments first
+ // Small vector has been determined from shaders in Super Smash Bros. Ultimate
+ boost::container::small_vector<const Inst*, 2> visited;
+ std::queue<const Inst*> queue;
+ queue.push(value.InstRecursive());
+
+ while (!queue.empty()) {
+ // Pop one instruction from the queue
+ const Inst* const inst{queue.front()};
+ queue.pop();
+ if (const std::optional result = pred(inst)) {
+ // This is the instruction we were looking for
+ return result;
+ }
+ // Visit the right most arguments first
+ for (size_t arg = inst->NumArgs(); arg--;) {
+ const Value arg_value{inst->Arg(arg)};
+ if (arg_value.IsImmediate()) {
+ continue;
+ }
+ // Queue instruction if it hasn't been visited
+ const Inst* const arg_inst{arg_value.InstRecursive()};
+ if (std::ranges::find(visited, arg_inst) == visited.end()) {
+ visited.push_back(arg_inst);
+ queue.push(arg_inst);
+ }
+ }
+ }
+ // SSA tree has been traversed and the result hasn't been found
+ return std::nullopt;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
new file mode 100644
index 000000000..fc18ea2a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -0,0 +1,29 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/condition.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Condition condition) {
+ std::string ret;
+ if (condition.GetFlowTest() != FlowTest::T) {
+ ret = fmt::to_string(condition.GetFlowTest());
+ }
+ const auto [pred, negated]{condition.GetPred()};
+ if (!ret.empty()) {
+ ret += '&';
+ }
+ if (negated) {
+ ret += '!';
+ }
+ ret += fmt::to_string(pred);
+ return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
new file mode 100644
index 000000000..aa8597c60
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <string>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+
+namespace Shader::IR {
+
+class Condition {
+public:
+ Condition() noexcept = default;
+
+ explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
+ : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
+ pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
+
+ explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
+ : Condition(FlowTest::T, pred_, pred_negated_) {}
+
+ explicit Condition(bool value) : Condition(Pred::PT, !value) {}
+
+ auto operator<=>(const Condition&) const noexcept = default;
+
+ [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
+ return static_cast<IR::FlowTest>(flow_test);
+ }
+
+ [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
+ return {static_cast<IR::Pred>(pred), pred_negated != 0};
+ }
+
+private:
+ u16 flow_test;
+ u8 pred;
+ u8 pred_negated;
+};
+
+std::string NameOf(Condition condition);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Condition> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp
new file mode 100644
index 000000000..6ebb4ad89
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.cpp
@@ -0,0 +1,83 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/flow_test.h"
+
+namespace Shader::IR {
+
+std::string NameOf(FlowTest flow_test) {
+ switch (flow_test) {
+ case FlowTest::F:
+ return "F";
+ case FlowTest::LT:
+ return "LT";
+ case FlowTest::EQ:
+ return "EQ";
+ case FlowTest::LE:
+ return "LE";
+ case FlowTest::GT:
+ return "GT";
+ case FlowTest::NE:
+ return "NE";
+ case FlowTest::GE:
+ return "GE";
+ case FlowTest::NUM:
+ return "NUM";
+ case FlowTest::NaN:
+ return "NAN";
+ case FlowTest::LTU:
+ return "LTU";
+ case FlowTest::EQU:
+ return "EQU";
+ case FlowTest::LEU:
+ return "LEU";
+ case FlowTest::GTU:
+ return "GTU";
+ case FlowTest::NEU:
+ return "NEU";
+ case FlowTest::GEU:
+ return "GEU";
+ case FlowTest::T:
+ return "T";
+ case FlowTest::OFF:
+ return "OFF";
+ case FlowTest::LO:
+ return "LO";
+ case FlowTest::SFF:
+ return "SFF";
+ case FlowTest::LS:
+ return "LS";
+ case FlowTest::HI:
+ return "HI";
+ case FlowTest::SFT:
+ return "SFT";
+ case FlowTest::HS:
+ return "HS";
+ case FlowTest::OFT:
+ return "OFT";
+ case FlowTest::CSM_TA:
+ return "CSM_TA";
+ case FlowTest::CSM_TR:
+ return "CSM_TR";
+ case FlowTest::CSM_MX:
+ return "CSM_MX";
+ case FlowTest::FCSM_TA:
+ return "FCSM_TA";
+ case FlowTest::FCSM_TR:
+ return "FCSM_TR";
+ case FlowTest::FCSM_MX:
+ return "FCSM_MX";
+ case FlowTest::RLE:
+ return "RLE";
+ case FlowTest::RGT:
+ return "RGT";
+ }
+ return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h
new file mode 100644
index 000000000..09e113773
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.h
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class FlowTest : u64 {
+ F,
+ LT,
+ EQ,
+ LE,
+ GT,
+ NE,
+ GE,
+ NUM,
+ NaN,
+ LTU,
+ EQU,
+ LEU,
+ GTU,
+ NEU,
+ GEU,
+ T,
+ OFF,
+ LO,
+ SFF,
+ LS,
+ HI,
+ SFT,
+ HS,
+ OFT,
+ CSM_TA,
+ CSM_TR,
+ CSM_MX,
+ FCSM_TA,
+ FCSM_TR,
+ FCSM_MX,
+ RLE,
+ RGT,
+};
+
+[[nodiscard]] std::string NameOf(FlowTest flow_test);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::FlowTest> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
new file mode 100644
index 000000000..13159a68d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -0,0 +1,2017 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_cast.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+namespace {
+[[noreturn]] void ThrowInvalidType(Type type) {
+ throw InvalidArgument("Invalid type {}", type);
+}
+
+Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
+ if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
+ return ir.CompositeConstruct(bias_lod, lod_clamp);
+ } else if (!bias_lod.IsEmpty()) {
+ return bias_lod;
+ } else if (!lod_clamp.IsEmpty()) {
+ return lod_clamp;
+ } else {
+ return Value{};
+ }
+}
+} // Anonymous namespace
+
+U1 IREmitter::Imm1(bool value) const {
+ return U1{Value{value}};
+}
+
+U8 IREmitter::Imm8(u8 value) const {
+ return U8{Value{value}};
+}
+
+U16 IREmitter::Imm16(u16 value) const {
+ return U16{Value{value}};
+}
+
+U32 IREmitter::Imm32(u32 value) const {
+ return U32{Value{value}};
+}
+
+U32 IREmitter::Imm32(s32 value) const {
+ return U32{Value{static_cast<u32>(value)}};
+}
+
+F32 IREmitter::Imm32(f32 value) const {
+ return F32{Value{value}};
+}
+
+U64 IREmitter::Imm64(u64 value) const {
+ return U64{Value{value}};
+}
+
+U64 IREmitter::Imm64(s64 value) const {
+ return U64{Value{static_cast<u64>(value)}};
+}
+
+F64 IREmitter::Imm64(f64 value) const {
+ return F64{Value{value}};
+}
+
+U1 IREmitter::ConditionRef(const U1& value) {
+ return Inst<U1>(Opcode::ConditionRef, value);
+}
+
+void IREmitter::Reference(const Value& value) {
+ Inst(Opcode::Reference, value);
+}
+
+void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
+ Inst(Opcode::PhiMove, Value{&phi}, value);
+}
+
+void IREmitter::Prologue() {
+ Inst(Opcode::Prologue);
+}
+
+void IREmitter::Epilogue() {
+ Inst(Opcode::Epilogue);
+}
+
+void IREmitter::DemoteToHelperInvocation() {
+ Inst(Opcode::DemoteToHelperInvocation);
+}
+
+void IREmitter::EmitVertex(const U32& stream) {
+ Inst(Opcode::EmitVertex, stream);
+}
+
+void IREmitter::EndPrimitive(const U32& stream) {
+ Inst(Opcode::EndPrimitive, stream);
+}
+
+void IREmitter::Barrier() {
+ Inst(Opcode::Barrier);
+}
+
+void IREmitter::WorkgroupMemoryBarrier() {
+ Inst(Opcode::WorkgroupMemoryBarrier);
+}
+
+void IREmitter::DeviceMemoryBarrier() {
+ Inst(Opcode::DeviceMemoryBarrier);
+}
+
+U32 IREmitter::GetReg(IR::Reg reg) {
+ return Inst<U32>(Opcode::GetRegister, reg);
+}
+
+void IREmitter::SetReg(IR::Reg reg, const U32& value) {
+ Inst(Opcode::SetRegister, reg, value);
+}
+
+U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
+ if (pred == Pred::PT) {
+ return Imm1(!is_negated);
+ }
+ const U1 value{Inst<U1>(Opcode::GetPred, pred)};
+ if (is_negated) {
+ return Inst<U1>(Opcode::LogicalNot, value);
+ } else {
+ return value;
+ }
+}
+
+void IREmitter::SetPred(IR::Pred pred, const U1& value) {
+ if (pred != IR::Pred::PT) {
+ Inst(Opcode::SetPred, pred, value);
+ }
+}
+
+U1 IREmitter::GetGotoVariable(u32 id) {
+ return Inst<U1>(Opcode::GetGotoVariable, id);
+}
+
+void IREmitter::SetGotoVariable(u32 id, const U1& value) {
+ Inst(Opcode::SetGotoVariable, id, value);
+}
+
+U32 IREmitter::GetIndirectBranchVariable() {
+ return Inst<U32>(Opcode::GetIndirectBranchVariable);
+}
+
+void IREmitter::SetIndirectBranchVariable(const U32& value) {
+ Inst(Opcode::SetIndirectBranchVariable, value);
+}
+
+U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
+ return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+}
+
+Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+ bool is_signed) {
+ switch (bitsize) {
+ case 8:
+ return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
+ case 16:
+ return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
+ case 32:
+ return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
+ case 64:
+ return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
+ default:
+ throw InvalidArgument("Invalid bit size {}", bitsize);
+ }
+}
+
+F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
+ return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
+}
+
+U1 IREmitter::GetZFlag() {
+ return Inst<U1>(Opcode::GetZFlag);
+}
+
+U1 IREmitter::GetSFlag() {
+ return Inst<U1>(Opcode::GetSFlag);
+}
+
+U1 IREmitter::GetCFlag() {
+ return Inst<U1>(Opcode::GetCFlag);
+}
+
+U1 IREmitter::GetOFlag() {
+ return Inst<U1>(Opcode::GetOFlag);
+}
+
+void IREmitter::SetZFlag(const U1& value) {
+ Inst(Opcode::SetZFlag, value);
+}
+
+void IREmitter::SetSFlag(const U1& value) {
+ Inst(Opcode::SetSFlag, value);
+}
+
+void IREmitter::SetCFlag(const U1& value) {
+ Inst(Opcode::SetCFlag, value);
+}
+
+void IREmitter::SetOFlag(const U1& value) {
+ Inst(Opcode::SetOFlag, value);
+}
+
+static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
+ switch (flow_test) {
+ case FlowTest::F:
+ return ir.Imm1(false);
+ case FlowTest::LT:
+ return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())),
+ ir.GetOFlag());
+ case FlowTest::EQ:
+ return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag());
+ case FlowTest::LE:
+ return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
+ case FlowTest::GT:
+ return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()),
+ ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::NE:
+ return ir.LogicalNot(ir.GetZFlag());
+ case FlowTest::GE:
+ return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()));
+ case FlowTest::NUM:
+ return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::NaN:
+ return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
+ case FlowTest::LTU:
+ return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag());
+ case FlowTest::EQU:
+ return ir.GetZFlag();
+ case FlowTest::LEU:
+ return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag());
+ case FlowTest::GTU:
+ return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()),
+ ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
+ case FlowTest::NEU:
+ return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::GEU:
+ return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()),
+ ir.GetOFlag());
+ case FlowTest::T:
+ return ir.Imm1(true);
+ case FlowTest::OFF:
+ return ir.LogicalNot(ir.GetOFlag());
+ case FlowTest::LO:
+ return ir.LogicalNot(ir.GetCFlag());
+ case FlowTest::SFF:
+ return ir.LogicalNot(ir.GetSFlag());
+ case FlowTest::LS:
+ return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag()));
+ case FlowTest::HI:
+ return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::SFT:
+ return ir.GetSFlag();
+ case FlowTest::HS:
+ return ir.GetCFlag();
+ case FlowTest::OFT:
+ return ir.GetOFlag();
+ case FlowTest::RLE:
+ return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag());
+ case FlowTest::RGT:
+ return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
+ case FlowTest::FCSM_TR:
+ LOG_WARNING(Shader, "(STUBBED) FCSM_TR");
+ return ir.Imm1(false);
+ case FlowTest::CSM_TA:
+ case FlowTest::CSM_TR:
+ case FlowTest::CSM_MX:
+ case FlowTest::FCSM_TA:
+ case FlowTest::FCSM_MX:
+ default:
+ throw NotImplementedException("Flow test {}", flow_test);
+ }
+}
+
+U1 IREmitter::Condition(IR::Condition cond) {
+ const FlowTest flow_test{cond.GetFlowTest()};
+ const auto [pred, is_negated]{cond.GetPred()};
+ if (flow_test == FlowTest::T) {
+ return GetPred(pred, is_negated);
+ }
+ return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
+}
+
+U1 IREmitter::GetFlowTestResult(FlowTest test) {
+ return GetFlowTest(*this, test);
+}
+
+F32 IREmitter::GetAttribute(IR::Attribute attribute) {
+ return GetAttribute(attribute, Imm32(0));
+}
+
+F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
+ return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
+}
+
+void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
+ Inst(Opcode::SetAttribute, attribute, value, vertex);
+}
+
+F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
+ return GetAttributeIndexed(phys_address, Imm32(0));
+}
+
+F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
+ return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
+}
+
+void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
+ Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
+}
+
+F32 IREmitter::GetPatch(Patch patch) {
+ return Inst<F32>(Opcode::GetPatch, patch);
+}
+
+void IREmitter::SetPatch(Patch patch, const F32& value) {
+ Inst(Opcode::SetPatch, patch, value);
+}
+
+void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
+ Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
+}
+
+void IREmitter::SetSampleMask(const U32& value) {
+ Inst(Opcode::SetSampleMask, value);
+}
+
+void IREmitter::SetFragDepth(const F32& value) {
+ Inst(Opcode::SetFragDepth, value);
+}
+
+U32 IREmitter::WorkgroupIdX() {
+ return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
+}
+
+U32 IREmitter::WorkgroupIdY() {
+ return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)};
+}
+
+U32 IREmitter::WorkgroupIdZ() {
+ return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
+}
+
+Value IREmitter::LocalInvocationId() {
+ return Inst(Opcode::LocalInvocationId);
+}
+
+U32 IREmitter::LocalInvocationIdX() {
+ return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
+}
+
+U32 IREmitter::LocalInvocationIdY() {
+ return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)};
+}
+
+U32 IREmitter::LocalInvocationIdZ() {
+ return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
+}
+
+U32 IREmitter::InvocationId() {
+ return Inst<U32>(Opcode::InvocationId);
+}
+
+U32 IREmitter::SampleId() {
+ return Inst<U32>(Opcode::SampleId);
+}
+
+U1 IREmitter::IsHelperInvocation() {
+ return Inst<U1>(Opcode::IsHelperInvocation);
+}
+
+F32 IREmitter::YDirection() {
+ return Inst<F32>(Opcode::YDirection);
+}
+
+U32 IREmitter::LaneId() {
+ return Inst<U32>(Opcode::LaneId);
+}
+
+U32 IREmitter::LoadGlobalU8(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalU8, address);
+}
+
+U32 IREmitter::LoadGlobalS8(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalS8, address);
+}
+
+U32 IREmitter::LoadGlobalU16(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalU16, address);
+}
+
+U32 IREmitter::LoadGlobalS16(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobalS16, address);
+}
+
+U32 IREmitter::LoadGlobal32(const U64& address) {
+ return Inst<U32>(Opcode::LoadGlobal32, address);
+}
+
+Value IREmitter::LoadGlobal64(const U64& address) {
+ return Inst<Value>(Opcode::LoadGlobal64, address);
+}
+
+Value IREmitter::LoadGlobal128(const U64& address) {
+ return Inst<Value>(Opcode::LoadGlobal128, address);
+}
+
+void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalU8, address, value);
+}
+
+void IREmitter::WriteGlobalS8(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalS8, address, value);
+}
+
+void IREmitter::WriteGlobalU16(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalU16, address, value);
+}
+
+void IREmitter::WriteGlobalS16(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobalS16, address, value);
+}
+
+void IREmitter::WriteGlobal32(const U64& address, const U32& value) {
+ Inst(Opcode::WriteGlobal32, address, value);
+}
+
+void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) {
+ Inst(Opcode::WriteGlobal64, address, vector);
+}
+
+void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
+ Inst(Opcode::WriteGlobal128, address, vector);
+}
+
+U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
+ return Inst<U32>(Opcode::LoadLocal, word_offset);
+}
+
+void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
+ Inst(Opcode::WriteLocal, word_offset, value);
+}
+
+Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
+ switch (bit_size) {
+ case 8:
+ return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
+ case 16:
+ return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
+ case 32:
+ return Inst(Opcode::LoadSharedU32, offset);
+ case 64:
+ return Inst(Opcode::LoadSharedU64, offset);
+ case 128:
+ return Inst(Opcode::LoadSharedU128, offset);
+ }
+ throw InvalidArgument("Invalid bit size {}", bit_size);
+}
+
+void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
+ switch (bit_size) {
+ case 8:
+ Inst(Opcode::WriteSharedU8, offset, value);
+ break;
+ case 16:
+ Inst(Opcode::WriteSharedU16, offset, value);
+ break;
+ case 32:
+ Inst(Opcode::WriteSharedU32, offset, value);
+ break;
+ case 64:
+ Inst(Opcode::WriteSharedU64, offset, value);
+ break;
+ case 128:
+ Inst(Opcode::WriteSharedU128, offset, value);
+ break;
+ default:
+ throw InvalidArgument("Invalid bit size {}", bit_size);
+ }
+}
+
+U1 IREmitter::GetZeroFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetZeroFromOp, op);
+}
+
+U1 IREmitter::GetSignFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetSignFromOp, op);
+}
+
+U1 IREmitter::GetCarryFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetCarryFromOp, op);
+}
+
+U1 IREmitter::GetOverflowFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetOverflowFromOp, op);
+}
+
+U1 IREmitter::GetSparseFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetSparseFromOp, op);
+}
+
+U1 IREmitter::GetInBoundsFromOp(const Value& op) {
+ return Inst<U1>(Opcode::GetInBoundsFromOp, op);
+}
+
+F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
+ if (e1.Type() != e2.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
+ }
+ switch (e1.Type()) {
+ case Type::U32:
+ return Inst(Opcode::CompositeConstructU32x2, e1, e2);
+ case Type::F16:
+ return Inst(Opcode::CompositeConstructF16x2, e1, e2);
+ case Type::F32:
+ return Inst(Opcode::CompositeConstructF32x2, e1, e2);
+ case Type::F64:
+ return Inst(Opcode::CompositeConstructF64x2, e1, e2);
+ default:
+ ThrowInvalidType(e1.Type());
+ }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
+ if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
+ }
+ switch (e1.Type()) {
+ case Type::U32:
+ return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
+ case Type::F16:
+ return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
+ case Type::F32:
+ return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
+ case Type::F64:
+ return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
+ default:
+ ThrowInvalidType(e1.Type());
+ }
+}
+
+Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+ const Value& e4) {
+ if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
+ e3.Type(), e4.Type());
+ }
+ switch (e1.Type()) {
+ case Type::U32:
+ return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
+ case Type::F16:
+ return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
+ case Type::F32:
+ return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
+ case Type::F64:
+ return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
+ default:
+ ThrowInvalidType(e1.Type());
+ }
+}
+
+Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
+ const auto read{[&](Opcode opcode, size_t limit) -> Value {
+ if (element >= limit) {
+ throw InvalidArgument("Out of bounds element {}", element);
+ }
+ return Inst(opcode, vector, Value{static_cast<u32>(element)});
+ }};
+ switch (vector.Type()) {
+ case Type::U32x2:
+ return read(Opcode::CompositeExtractU32x2, 2);
+ case Type::U32x3:
+ return read(Opcode::CompositeExtractU32x3, 3);
+ case Type::U32x4:
+ return read(Opcode::CompositeExtractU32x4, 4);
+ case Type::F16x2:
+ return read(Opcode::CompositeExtractF16x2, 2);
+ case Type::F16x3:
+ return read(Opcode::CompositeExtractF16x3, 3);
+ case Type::F16x4:
+ return read(Opcode::CompositeExtractF16x4, 4);
+ case Type::F32x2:
+ return read(Opcode::CompositeExtractF32x2, 2);
+ case Type::F32x3:
+ return read(Opcode::CompositeExtractF32x3, 3);
+ case Type::F32x4:
+ return read(Opcode::CompositeExtractF32x4, 4);
+ case Type::F64x2:
+ return read(Opcode::CompositeExtractF64x2, 2);
+ case Type::F64x3:
+ return read(Opcode::CompositeExtractF64x3, 3);
+ case Type::F64x4:
+ return read(Opcode::CompositeExtractF64x4, 4);
+ default:
+ ThrowInvalidType(vector.Type());
+ }
+}
+
+Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
+ const auto insert{[&](Opcode opcode, size_t limit) {
+ if (element >= limit) {
+ throw InvalidArgument("Out of bounds element {}", element);
+ }
+ return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
+ }};
+ switch (vector.Type()) {
+ case Type::U32x2:
+ return insert(Opcode::CompositeInsertU32x2, 2);
+ case Type::U32x3:
+ return insert(Opcode::CompositeInsertU32x3, 3);
+ case Type::U32x4:
+ return insert(Opcode::CompositeInsertU32x4, 4);
+ case Type::F16x2:
+ return insert(Opcode::CompositeInsertF16x2, 2);
+ case Type::F16x3:
+ return insert(Opcode::CompositeInsertF16x3, 3);
+ case Type::F16x4:
+ return insert(Opcode::CompositeInsertF16x4, 4);
+ case Type::F32x2:
+ return insert(Opcode::CompositeInsertF32x2, 2);
+ case Type::F32x3:
+ return insert(Opcode::CompositeInsertF32x3, 3);
+ case Type::F32x4:
+ return insert(Opcode::CompositeInsertF32x4, 4);
+ case Type::F64x2:
+ return insert(Opcode::CompositeInsertF64x2, 2);
+ case Type::F64x3:
+ return insert(Opcode::CompositeInsertF64x3, 3);
+ case Type::F64x4:
+ return insert(Opcode::CompositeInsertF64x4, 4);
+ default:
+ ThrowInvalidType(vector.Type());
+ }
+}
+
+Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
+ if (true_value.Type() != false_value.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
+ }
+ switch (true_value.Type()) {
+ case Type::U1:
+ return Inst(Opcode::SelectU1, condition, true_value, false_value);
+ case Type::U8:
+ return Inst(Opcode::SelectU8, condition, true_value, false_value);
+ case Type::U16:
+ return Inst(Opcode::SelectU16, condition, true_value, false_value);
+ case Type::U32:
+ return Inst(Opcode::SelectU32, condition, true_value, false_value);
+ case Type::U64:
+ return Inst(Opcode::SelectU64, condition, true_value, false_value);
+ case Type::F32:
+ return Inst(Opcode::SelectF32, condition, true_value, false_value);
+ case Type::F64:
+ return Inst(Opcode::SelectF64, condition, true_value, false_value);
+ default:
+ throw InvalidArgument("Invalid type {}", true_value.Type());
+ }
+}
+
+template <>
+IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
+ return Inst<IR::U32>(Opcode::BitCastU32F32, value);
+}
+
+template <>
+IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
+ return Inst<IR::F32>(Opcode::BitCastF32U32, value);
+}
+
+template <>
+IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
+ return Inst<IR::U16>(Opcode::BitCastU16F16, value);
+}
+
+template <>
+IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
+ return Inst<IR::F16>(Opcode::BitCastF16U16, value);
+}
+
+template <>
+IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
+ return Inst<IR::U64>(Opcode::BitCastU64F64, value);
+}
+
+template <>
+IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
+ return Inst<IR::F64>(Opcode::BitCastF64U64, value);
+}
+
+U64 IREmitter::PackUint2x32(const Value& vector) {
+ return Inst<U64>(Opcode::PackUint2x32, vector);
+}
+
+Value IREmitter::UnpackUint2x32(const U64& value) {
+ return Inst<Value>(Opcode::UnpackUint2x32, value);
+}
+
+U32 IREmitter::PackFloat2x16(const Value& vector) {
+ return Inst<U32>(Opcode::PackFloat2x16, vector);
+}
+
+Value IREmitter::UnpackFloat2x16(const U32& value) {
+ return Inst(Opcode::UnpackFloat2x16, value);
+}
+
+U32 IREmitter::PackHalf2x16(const Value& vector) {
+ return Inst<U32>(Opcode::PackHalf2x16, vector);
+}
+
+Value IREmitter::UnpackHalf2x16(const U32& value) {
+ return Inst(Opcode::UnpackHalf2x16, value);
+}
+
+F64 IREmitter::PackDouble2x32(const Value& vector) {
+ return Inst<F64>(Opcode::PackDouble2x32, vector);
+}
+
+Value IREmitter::UnpackDouble2x32(const F64& value) {
+ return Inst<Value>(Opcode::UnpackDouble2x32, value);
+}
+
+F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
+ FpControl control) {
+ if (a.Type() != b.Type() || a.Type() != c.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
+ }
+ switch (a.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPAbs16, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPAbs32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPAbs64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPNeg16, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPNeg32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPNeg64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
+ F16F32F64 result{value};
+ if (abs) {
+ result = FPAbs(result);
+ }
+ if (neg) {
+ result = FPNeg(result);
+ }
+ return result;
+}
+
+F32 IREmitter::FPCos(const F32& value) {
+ return Inst<F32>(Opcode::FPCos, value);
+}
+
+F32 IREmitter::FPSin(const F32& value) {
+ return Inst<F32>(Opcode::FPSin, value);
+}
+
+F32 IREmitter::FPExp2(const F32& value) {
+ return Inst<F32>(Opcode::FPExp2, value);
+}
+
+F32 IREmitter::FPLog2(const F32& value) {
+ return Inst<F32>(Opcode::FPLog2, value);
+}
+
+F32F64 IREmitter::FPRecip(const F32F64& value) {
+ switch (value.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPRecip32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPRecip64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
+ switch (value.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPRecipSqrt32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPRecipSqrt64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F32 IREmitter::FPSqrt(const F32& value) {
+ return Inst<F32>(Opcode::FPSqrt, value);
+}
+
+F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPSaturate16, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPSaturate32, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPSaturate64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+ const F16F32F64& max_value) {
+ if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
+ throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
+ max_value.Type());
+ }
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPFloor16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPFloor32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPFloor64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPCeil16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPCeil32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPCeil64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value);
+ case Type::F32:
+ return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
+ lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
+ lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
+ lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
+ bool ordered) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F16:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
+ : Opcode::FPUnordGreaterThanEqual16,
+ Flags{control}, lhs, rhs);
+ case Type::F32:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
+ : Opcode::FPUnordGreaterThanEqual32,
+ Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
+ : Opcode::FPUnordGreaterThanEqual64,
+ Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::FPIsNan(const F16F32F64& value) {
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U1>(Opcode::FPIsNan16, value);
+ case Type::F32:
+ return Inst<U1>(Opcode::FPIsNan32, value);
+ case Type::F64:
+ return Inst<U1>(Opcode::FPIsNan64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
+}
+
+U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
+}
+
+F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::F32:
+ return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs);
+ case Type::F64:
+ return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs);
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::IAdd32, a, b);
+ case Type::U64:
+ return Inst<U64>(Opcode::IAdd64, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
+ if (a.Type() != b.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
+ }
+ switch (a.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ISub32, a, b);
+ case Type::U64:
+ return Inst<U64>(Opcode::ISub64, a, b);
+ default:
+ ThrowInvalidType(a.Type());
+ }
+}
+
+U32 IREmitter::IMul(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::IMul32, a, b);
+}
+
+U32U64 IREmitter::INeg(const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::INeg32, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::INeg64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32 IREmitter::IAbs(const U32& value) {
+ return Inst<U32>(Opcode::IAbs32, value);
+}
+
+U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
+ switch (base.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
+ case Type::U64:
+ return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift);
+ default:
+ ThrowInvalidType(base.Type());
+ }
+}
+
+U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) {
+ switch (base.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
+ case Type::U64:
+ return Inst<U64>(Opcode::ShiftRightLogical64, base, shift);
+ default:
+ ThrowInvalidType(base.Type());
+ }
+}
+
+U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
+ switch (base.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
+ case Type::U64:
+ return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift);
+ default:
+ ThrowInvalidType(base.Type());
+ }
+}
+
+U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+}
+
+U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::BitwiseOr32, a, b);
+}
+
+U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::BitwiseXor32, a, b);
+}
+
+U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+ const U32& count) {
+ return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
+}
+
+U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+ bool is_signed) {
+ return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
+ count);
+}
+
+U32 IREmitter::BitReverse(const U32& value) {
+ return Inst<U32>(Opcode::BitReverse32, value);
+}
+
+U32 IREmitter::BitCount(const U32& value) {
+ return Inst<U32>(Opcode::BitCount32, value);
+}
+
+U32 IREmitter::BitwiseNot(const U32& value) {
+ return Inst<U32>(Opcode::BitwiseNot32, value);
+}
+
+U32 IREmitter::FindSMsb(const U32& value) {
+ return Inst<U32>(Opcode::FindSMsb32, value);
+}
+
+U32 IREmitter::FindUMsb(const U32& value) {
+ return Inst<U32>(Opcode::FindUMsb32, value);
+}
+
+U32 IREmitter::SMin(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::SMin32, a, b);
+}
+
+U32 IREmitter::UMin(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::UMin32, a, b);
+}
+
+U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
+ return is_signed ? SMin(a, b) : UMin(a, b);
+}
+
+U32 IREmitter::SMax(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::SMax32, a, b);
+}
+
+U32 IREmitter::UMax(const U32& a, const U32& b) {
+ return Inst<U32>(Opcode::UMax32, a, b);
+}
+
+U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
+ return is_signed ? SMax(a, b) : UMax(a, b);
+}
+
+U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
+ return Inst<U32>(Opcode::SClamp32, value, min, max);
+}
+
+U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
+ return Inst<U32>(Opcode::UClamp32, value, min, max);
+}
+
+U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
+}
+
+U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
+ if (lhs.Type() != rhs.Type()) {
+ throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+ }
+ switch (lhs.Type()) {
+ case Type::U32:
+ return Inst<U1>(Opcode::IEqual, lhs, rhs);
+ case Type::U64: {
+ // Manually compare the unpacked values
+ const Value lhs_vector{UnpackUint2x32(lhs)};
+ const Value rhs_vector{UnpackUint2x32(rhs)};
+ return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
+ IR::U32{CompositeExtract(rhs_vector, 0)}),
+ IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
+ IR::U32{CompositeExtract(rhs_vector, 1)}));
+ }
+ default:
+ ThrowInvalidType(lhs.Type());
+ }
+}
+
+U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
+}
+
+U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
+ return Inst<U1>(Opcode::INotEqual, lhs, rhs);
+}
+
+U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
+ return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
+}
+
+U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
+ return is_signed ? SharedAtomicSMin(pointer_offset, value)
+ : SharedAtomicUMin(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
+ return is_signed ? SharedAtomicSMax(pointer_offset, value)
+ : SharedAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
+}
+
+U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
+}
+
+U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
+ default:
+ ThrowInvalidType(pointer_offset.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+ return is_signed ? GlobalAtomicSMin(pointer_offset, value)
+ : GlobalAtomicUMin(pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
+ return is_signed ? GlobalAtomicSMax(pointer_offset, value)
+ : GlobalAtomicUMax(pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
+}
+
+U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
+ return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
+}
+
+U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+}
+
+U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
+ case Type::U64:
+ return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
+ default:
+ ThrowInvalidType(pointer_offset.Type());
+ }
+}
+
+F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
+}
+
+Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+ const FpControl control) {
+ return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
+}
+
+U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
+ return Inst<U1>(Opcode::LogicalOr, a, b);
+}
+
+U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
+ return Inst<U1>(Opcode::LogicalAnd, a, b);
+}
+
+U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
+ return Inst<U1>(Opcode::LogicalXor, a, b);
+}
+
+U1 IREmitter::LogicalNot(const U1& value) {
+ return Inst<U1>(Opcode::LogicalNot, value);
+}
+
+U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
+ switch (bitsize) {
+ case 16:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertS16F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertS16F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertS16F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 32:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertS32F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertS32F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertS32F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 64:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U64>(Opcode::ConvertS64F16, value);
+ case Type::F32:
+ return Inst<U64>(Opcode::ConvertS64F32, value);
+ case Type::F64:
+ return Inst<U64>(Opcode::ConvertS64F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ default:
+ throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+ }
+}
+
+U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
+ switch (bitsize) {
+ case 16:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertU16F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertU16F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertU16F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 32:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U32>(Opcode::ConvertU32F16, value);
+ case Type::F32:
+ return Inst<U32>(Opcode::ConvertU32F32, value);
+ case Type::F64:
+ return Inst<U32>(Opcode::ConvertU32F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ case 64:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<U64>(Opcode::ConvertU64F16, value);
+ case Type::F32:
+ return Inst<U64>(Opcode::ConvertU64F32, value);
+ case Type::F64:
+ return Inst<U64>(Opcode::ConvertU64F64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
+ default:
+ throw InvalidArgument("Invalid destination bitsize {}", bitsize);
+ }
+}
+
+U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
+ return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
+}
+
+F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control) {
+ switch (dest_bitsize) {
+ case 16:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value);
+ case 16:
+ return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value);
+ case 32:
+ return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value);
+ case 64:
+ return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value);
+ }
+ break;
+ case 32:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value);
+ case 16:
+ return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value);
+ case 32:
+ return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value);
+ case 64:
+ return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value);
+ }
+ break;
+ case 64:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value);
+ case 16:
+ return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value);
+ case 32:
+ return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value);
+ case 64:
+ return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value);
+ }
+ break;
+ }
+ throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
+}
+
+F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control) {
+ switch (dest_bitsize) {
+ case 16:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value);
+ case 16:
+ return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value);
+ case 32:
+ return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value);
+ case 64:
+ return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value);
+ }
+ break;
+ case 32:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value);
+ case 16:
+ return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value);
+ case 32:
+ return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value);
+ case 64:
+ return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value);
+ }
+ break;
+ case 64:
+ switch (src_bitsize) {
+ case 8:
+ return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value);
+ case 16:
+ return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value);
+ case 32:
+ return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value);
+ case 64:
+ return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value);
+ }
+ break;
+ }
+ throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
+}
+
+F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+ const Value& value, FpControl control) {
+ return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control)
+ : ConvertUToF(dest_bitsize, src_bitsize, value, control);
+}
+
+U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
+ switch (result_bitsize) {
+ case 32:
+ switch (value.Type()) {
+ case Type::U32:
+ // Nothing to do
+ return value;
+ case Type::U64:
+ return Inst<U32>(Opcode::ConvertU32U64, value);
+ default:
+ break;
+ }
+ break;
+ case 64:
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst<U64>(Opcode::ConvertU64U32, value);
+ case Type::U64:
+ // Nothing to do
+ return value;
+ default:
+ break;
+ }
+ }
+ throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
+F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) {
+ switch (result_bitsize) {
+ case 16:
+ switch (value.Type()) {
+ case Type::F16:
+ // Nothing to do
+ return value;
+ case Type::F32:
+ return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value);
+ case Type::F64:
+ throw LogicError("Illegal conversion from F64 to F16");
+ default:
+ break;
+ }
+ break;
+ case 32:
+ switch (value.Type()) {
+ case Type::F16:
+ return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value);
+ case Type::F32:
+ // Nothing to do
+ return value;
+ case Type::F64:
+ return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value);
+ default:
+ break;
+ }
+ break;
+ case 64:
+ switch (value.Type()) {
+ case Type::F16:
+ throw LogicError("Illegal conversion from F16 to F64");
+ case Type::F32:
+ return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value);
+ case Type::F64:
+ // Nothing to do
+ return value;
+ default:
+ break;
+ }
+ break;
+ }
+ throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
+Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
+ const Value& offset, const F32& lod_clamp,
+ TextureInstInfo info) {
+ const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
+ : Opcode::BindlessImageSampleImplicitLod};
+ return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
+}
+
+Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
+ const Value& offset, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
+ : Opcode::BindlessImageSampleExplicitLod};
+ return Inst(op, Flags{info}, handle, coords, lod, offset);
+}
+
+F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
+ const F32& bias, const Value& offset,
+ const F32& lod_clamp, TextureInstInfo info) {
+ const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
+ : Opcode::BindlessImageSampleDrefImplicitLod};
+ return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
+}
+
+F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
+ const F32& lod, const Value& offset,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
+ : Opcode::BindlessImageSampleDrefExplicitLod};
+ return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset);
+}
+
+Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
+ const Value& offset2, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather};
+ return Inst(op, Flags{info}, handle, coords, offset, offset2);
+}
+
+Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
+ const Value& offset2, const F32& dref, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref
+ : Opcode::BindlessImageGatherDref};
+ return Inst(op, Flags{info}, handle, coords, offset, offset2, dref);
+}
+
+Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
+ const U32& lod, const U32& multisampling, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch};
+ return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
+}
+
+Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
+ : Opcode::BindlessImageQueryDimensions};
+ return Inst(op, handle, lod);
+}
+
+Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod
+ : Opcode::BindlessImageQueryLod};
+ return Inst(op, Flags{info}, handle, coords);
+}
+
+Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
+ const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
+ : Opcode::BindlessImageGradient};
+ return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
+}
+
+Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead};
+ return Inst(op, Flags{info}, handle, coords);
+}
+
+void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
+ Inst(op, Flags{info}, handle, coords, color);
+}
+
+Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
+ : Opcode::BindlessImageAtomicIAdd32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
+ : Opcode::BindlessImageAtomicSMin32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
+ : Opcode::BindlessImageAtomicUMin32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
+ bool is_signed, TextureInstInfo info) {
+ return is_signed ? ImageAtomicSMin(handle, coords, value, info)
+ : ImageAtomicUMin(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
+ : Opcode::BindlessImageAtomicSMax32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
+ : Opcode::BindlessImageAtomicUMax32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
+ bool is_signed, TextureInstInfo info) {
+ return is_signed ? ImageAtomicSMax(handle, coords, value, info)
+ : ImageAtomicUMax(handle, coords, value, info);
+}
+
+Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
+ : Opcode::BindlessImageAtomicInc32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
+ : Opcode::BindlessImageAtomicDec32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
+ : Opcode::BindlessImageAtomicAnd32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
+ : Opcode::BindlessImageAtomicOr32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
+ : Opcode::BindlessImageAtomicXor32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info) {
+ const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
+ : Opcode::BindlessImageAtomicExchange32};
+ return Inst(op, Flags{info}, handle, coords, value);
+}
+
+U1 IREmitter::VoteAll(const U1& value) {
+ return Inst<U1>(Opcode::VoteAll, value);
+}
+
+U1 IREmitter::VoteAny(const U1& value) {
+ return Inst<U1>(Opcode::VoteAny, value);
+}
+
+U1 IREmitter::VoteEqual(const U1& value) {
+ return Inst<U1>(Opcode::VoteEqual, value);
+}
+
+U32 IREmitter::SubgroupBallot(const U1& value) {
+ return Inst<U32>(Opcode::SubgroupBallot, value);
+}
+
+U32 IREmitter::SubgroupEqMask() {
+ return Inst<U32>(Opcode::SubgroupEqMask);
+}
+
+U32 IREmitter::SubgroupLtMask() {
+ return Inst<U32>(Opcode::SubgroupLtMask);
+}
+
+U32 IREmitter::SubgroupLeMask() {
+ return Inst<U32>(Opcode::SubgroupLeMask);
+}
+
+U32 IREmitter::SubgroupGtMask() {
+ return Inst<U32>(Opcode::SubgroupGtMask);
+}
+
+U32 IREmitter::SubgroupGeMask() {
+ return Inst<U32>(Opcode::SubgroupGeMask);
+}
+
+U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
+}
+
+U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask) {
+ return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
+}
+
+F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
+ return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
+}
+
+F32 IREmitter::DPdxFine(const F32& a) {
+ return Inst<F32>(Opcode::DPdxFine, a);
+}
+
+F32 IREmitter::DPdyFine(const F32& a) {
+ return Inst<F32>(Opcode::DPdyFine, a);
+}
+
+F32 IREmitter::DPdxCoarse(const F32& a) {
+ return Inst<F32>(Opcode::DPdxCoarse, a);
+}
+
+F32 IREmitter::DPdyCoarse(const F32& a) {
+ return Inst<F32>(Opcode::DPdyCoarse, a);
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
new file mode 100644
index 000000000..1b89ca5a0
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -0,0 +1,413 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+
+#include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class IREmitter {
+public:
+ explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
+ explicit IREmitter(Block& block_, Block::iterator insertion_point_)
+ : block{&block_}, insertion_point{insertion_point_} {}
+
+ Block* block;
+
+ [[nodiscard]] U1 Imm1(bool value) const;
+ [[nodiscard]] U8 Imm8(u8 value) const;
+ [[nodiscard]] U16 Imm16(u16 value) const;
+ [[nodiscard]] U32 Imm32(u32 value) const;
+ [[nodiscard]] U32 Imm32(s32 value) const;
+ [[nodiscard]] F32 Imm32(f32 value) const;
+ [[nodiscard]] U64 Imm64(u64 value) const;
+ [[nodiscard]] U64 Imm64(s64 value) const;
+ [[nodiscard]] F64 Imm64(f64 value) const;
+
+ U1 ConditionRef(const U1& value);
+ void Reference(const Value& value);
+
+ void PhiMove(IR::Inst& phi, const Value& value);
+
+ void Prologue();
+ void Epilogue();
+ void DemoteToHelperInvocation();
+ void EmitVertex(const U32& stream);
+ void EndPrimitive(const U32& stream);
+
+ [[nodiscard]] U32 GetReg(IR::Reg reg);
+ void SetReg(IR::Reg reg, const U32& value);
+
+ [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
+ void SetPred(IR::Pred pred, const U1& value);
+
+ [[nodiscard]] U1 GetGotoVariable(u32 id);
+ void SetGotoVariable(u32 id, const U1& value);
+
+ [[nodiscard]] U32 GetIndirectBranchVariable();
+ void SetIndirectBranchVariable(const U32& value);
+
+ [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
+ [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
+ bool is_signed);
+ [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
+
+ [[nodiscard]] U1 GetZFlag();
+ [[nodiscard]] U1 GetSFlag();
+ [[nodiscard]] U1 GetCFlag();
+ [[nodiscard]] U1 GetOFlag();
+
+ void SetZFlag(const U1& value);
+ void SetSFlag(const U1& value);
+ void SetCFlag(const U1& value);
+ void SetOFlag(const U1& value);
+
+ [[nodiscard]] U1 Condition(IR::Condition cond);
+ [[nodiscard]] U1 GetFlowTestResult(FlowTest test);
+
+ [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
+ [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
+ void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
+
+ [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
+ [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
+ void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
+
+ [[nodiscard]] F32 GetPatch(Patch patch);
+ void SetPatch(Patch patch, const F32& value);
+
+ void SetFragColor(u32 index, u32 component, const F32& value);
+ void SetSampleMask(const U32& value);
+ void SetFragDepth(const F32& value);
+
+ [[nodiscard]] U32 WorkgroupIdX();
+ [[nodiscard]] U32 WorkgroupIdY();
+ [[nodiscard]] U32 WorkgroupIdZ();
+
+ [[nodiscard]] Value LocalInvocationId();
+ [[nodiscard]] U32 LocalInvocationIdX();
+ [[nodiscard]] U32 LocalInvocationIdY();
+ [[nodiscard]] U32 LocalInvocationIdZ();
+
+ [[nodiscard]] U32 InvocationId();
+ [[nodiscard]] U32 SampleId();
+ [[nodiscard]] U1 IsHelperInvocation();
+ [[nodiscard]] F32 YDirection();
+
+ [[nodiscard]] U32 LaneId();
+
+ [[nodiscard]] U32 LoadGlobalU8(const U64& address);
+ [[nodiscard]] U32 LoadGlobalS8(const U64& address);
+ [[nodiscard]] U32 LoadGlobalU16(const U64& address);
+ [[nodiscard]] U32 LoadGlobalS16(const U64& address);
+ [[nodiscard]] U32 LoadGlobal32(const U64& address);
+ [[nodiscard]] Value LoadGlobal64(const U64& address);
+ [[nodiscard]] Value LoadGlobal128(const U64& address);
+
+ void WriteGlobalU8(const U64& address, const U32& value);
+ void WriteGlobalS8(const U64& address, const U32& value);
+ void WriteGlobalU16(const U64& address, const U32& value);
+ void WriteGlobalS16(const U64& address, const U32& value);
+ void WriteGlobal32(const U64& address, const U32& value);
+ void WriteGlobal64(const U64& address, const IR::Value& vector);
+ void WriteGlobal128(const U64& address, const IR::Value& vector);
+
+ [[nodiscard]] U32 LoadLocal(const U32& word_offset);
+ void WriteLocal(const U32& word_offset, const U32& value);
+
+ [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
+ void WriteShared(int bit_size, const U32& offset, const Value& value);
+
+ [[nodiscard]] U1 GetZeroFromOp(const Value& op);
+ [[nodiscard]] U1 GetSignFromOp(const Value& op);
+ [[nodiscard]] U1 GetCarryFromOp(const Value& op);
+ [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
+ [[nodiscard]] U1 GetSparseFromOp(const Value& op);
+ [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
+
+ [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
+ [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
+ [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
+ const Value& e4);
+ [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
+ [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
+
+ [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
+ const Value& false_value);
+
+ void Barrier();
+ void WorkgroupMemoryBarrier();
+ void DeviceMemoryBarrier();
+
+ template <typename Dest, typename Source>
+ [[nodiscard]] Dest BitCast(const Source& value);
+
+ [[nodiscard]] U64 PackUint2x32(const Value& vector);
+ [[nodiscard]] Value UnpackUint2x32(const U64& value);
+
+ [[nodiscard]] U32 PackFloat2x16(const Value& vector);
+ [[nodiscard]] Value UnpackFloat2x16(const U32& value);
+
+ [[nodiscard]] U32 PackHalf2x16(const Value& vector);
+ [[nodiscard]] Value UnpackHalf2x16(const U32& value);
+
+ [[nodiscard]] F64 PackDouble2x32(const Value& vector);
+ [[nodiscard]] Value UnpackDouble2x32(const F64& value);
+
+ [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
+ FpControl control = {});
+
+ [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
+ [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
+ [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
+
+ [[nodiscard]] F32 FPCos(const F32& value);
+ [[nodiscard]] F32 FPSin(const F32& value);
+ [[nodiscard]] F32 FPExp2(const F32& value);
+ [[nodiscard]] F32 FPLog2(const F32& value);
+ [[nodiscard]] F32F64 FPRecip(const F32F64& value);
+ [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
+ [[nodiscard]] F32 FPSqrt(const F32& value);
+ [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
+ [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
+ const F16F32F64& max_value);
+ [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
+ [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
+
+ [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+ bool ordered = true);
+ [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+ bool ordered = true);
+ [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
+ bool ordered = true);
+ [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
+ FpControl control = {}, bool ordered = true);
+ [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+ FpControl control = {}, bool ordered = true);
+ [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+ FpControl control = {}, bool ordered = true);
+ [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
+ [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
+ [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
+ [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
+ [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
+
+ [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
+ [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
+ [[nodiscard]] U32 IMul(const U32& a, const U32& b);
+ [[nodiscard]] U32U64 INeg(const U32U64& value);
+ [[nodiscard]] U32 IAbs(const U32& value);
+ [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
+ [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
+ [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
+ [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
+ [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
+ [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
+ [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
+ const U32& count);
+ [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
+ bool is_signed = false);
+ [[nodiscard]] U32 BitReverse(const U32& value);
+ [[nodiscard]] U32 BitCount(const U32& value);
+ [[nodiscard]] U32 BitwiseNot(const U32& value);
+
+ [[nodiscard]] U32 FindSMsb(const U32& value);
+ [[nodiscard]] U32 FindUMsb(const U32& value);
+ [[nodiscard]] U32 SMin(const U32& a, const U32& b);
+ [[nodiscard]] U32 UMin(const U32& a, const U32& b);
+ [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
+ [[nodiscard]] U32 SMax(const U32& a, const U32& b);
+ [[nodiscard]] U32 UMax(const U32& a, const U32& b);
+ [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
+ [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
+ [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
+
+ [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
+ [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
+ [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+ [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
+ [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
+ [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
+
+ [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
+ [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
+ [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
+ [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
+
+ [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
+ bool is_signed);
+ [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
+ bool is_signed);
+ [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
+ [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
+ [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
+ [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
+
+ [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+ [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+ [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+ [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
+ const FpControl control = {});
+
+ [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
+ [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
+ [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
+ [[nodiscard]] U1 LogicalNot(const U1& value);
+
+ [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
+ [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
+ [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
+ [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control = {});
+ [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
+ FpControl control = {});
+ [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
+ const Value& value, FpControl control = {});
+
+ [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
+ [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
+ FpControl control = {});
+
+ [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
+ const F32& bias, const Value& offset,
+ const F32& lod_clamp, TextureInstInfo info);
+ [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
+ const F32& lod, const Value& offset,
+ TextureInstInfo info);
+ [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
+ const F32& dref, const F32& bias,
+ const Value& offset, const F32& lod_clamp,
+ TextureInstInfo info);
+ [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
+ const F32& dref, const F32& lod,
+ const Value& offset, TextureInstInfo info);
+ [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
+
+ [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
+ const Value& offset2, TextureInstInfo info);
+ [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
+ const Value& offset, const Value& offset2, const F32& dref,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
+ const U32& lod, const U32& multisampling, TextureInstInfo info);
+ [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
+ const Value& derivates, const Value& offset,
+ const F32& lod_clamp, TextureInstInfo info);
+ [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
+ void ImageWrite(const Value& handle, const Value& coords, const Value& color,
+ TextureInstInfo info);
+
+ [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
+ const Value& value, bool is_signed, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
+ const Value& value, bool is_signed, TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
+ TextureInstInfo info);
+ [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
+ const Value& value, TextureInstInfo info);
+ [[nodiscard]] U1 VoteAll(const U1& value);
+ [[nodiscard]] U1 VoteAny(const U1& value);
+ [[nodiscard]] U1 VoteEqual(const U1& value);
+ [[nodiscard]] U32 SubgroupBallot(const U1& value);
+ [[nodiscard]] U32 SubgroupEqMask();
+ [[nodiscard]] U32 SubgroupLtMask();
+ [[nodiscard]] U32 SubgroupLeMask();
+ [[nodiscard]] U32 SubgroupGtMask();
+ [[nodiscard]] U32 SubgroupGeMask();
+ [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask);
+ [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask);
+ [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
+ const IR::U32& seg_mask);
+ [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
+ const IR::U32& clamp, const IR::U32& seg_mask);
+ [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
+ FpControl control = {});
+
+ [[nodiscard]] F32 DPdxFine(const F32& a);
+
+ [[nodiscard]] F32 DPdyFine(const F32& a);
+
+ [[nodiscard]] F32 DPdxCoarse(const F32& a);
+
+ [[nodiscard]] F32 DPdyCoarse(const F32& a);
+
+private:
+ IR::Block::iterator insertion_point;
+
+ template <typename T = Value, typename... Args>
+ T Inst(Opcode op, Args... args) {
+ auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
+ return T{Value{&*it}};
+ }
+
+ template <typename T>
+ requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
+ Flags() = default;
+ Flags(T proxy_) : proxy{proxy_} {}
+
+ T proxy;
+ };
+
+ template <typename T = Value, typename FlagType, typename... Args>
+ T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
+ u32 raw_flags{};
+ std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
+ auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+ return T{Value{&*it}};
+ }
+};
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 000000000..3dfa5a880
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,411 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+namespace {
+void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
+ if (inst && inst->GetOpcode() != opcode) {
+ throw LogicError("Invalid pseudo-instruction");
+ }
+}
+
+void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
+ if (dest_inst) {
+ throw LogicError("Only one of each type of pseudo-op allowed");
+ }
+ dest_inst = pseudo_inst;
+}
+
+void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
+ if (inst->GetOpcode() != expected_opcode) {
+ throw LogicError("Undoing use of invalid pseudo-op");
+ }
+ inst = nullptr;
+}
+
+void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
+ if (!associated_insts) {
+ associated_insts = std::make_unique<AssociatedInsts>();
+ }
+}
+} // Anonymous namespace
+
+Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
+ if (op == Opcode::Phi) {
+ std::construct_at(&phi_args);
+ } else {
+ std::construct_at(&args);
+ }
+}
+
+Inst::~Inst() {
+ if (op == Opcode::Phi) {
+ std::destroy_at(&phi_args);
+ } else {
+ std::destroy_at(&args);
+ }
+}
+
+bool Inst::MayHaveSideEffects() const noexcept {
+ switch (op) {
+ case Opcode::ConditionRef:
+ case Opcode::Reference:
+ case Opcode::PhiMove:
+ case Opcode::Prologue:
+ case Opcode::Epilogue:
+ case Opcode::Join:
+ case Opcode::DemoteToHelperInvocation:
+ case Opcode::Barrier:
+ case Opcode::WorkgroupMemoryBarrier:
+ case Opcode::DeviceMemoryBarrier:
+ case Opcode::EmitVertex:
+ case Opcode::EndPrimitive:
+ case Opcode::SetAttribute:
+ case Opcode::SetAttributeIndexed:
+ case Opcode::SetPatch:
+ case Opcode::SetFragColor:
+ case Opcode::SetSampleMask:
+ case Opcode::SetFragDepth:
+ case Opcode::WriteGlobalU8:
+ case Opcode::WriteGlobalS8:
+ case Opcode::WriteGlobalU16:
+ case Opcode::WriteGlobalS16:
+ case Opcode::WriteGlobal32:
+ case Opcode::WriteGlobal64:
+ case Opcode::WriteGlobal128:
+ case Opcode::WriteStorageU8:
+ case Opcode::WriteStorageS8:
+ case Opcode::WriteStorageU16:
+ case Opcode::WriteStorageS16:
+ case Opcode::WriteStorage32:
+ case Opcode::WriteStorage64:
+ case Opcode::WriteStorage128:
+ case Opcode::WriteLocal:
+ case Opcode::WriteSharedU8:
+ case Opcode::WriteSharedU16:
+ case Opcode::WriteSharedU32:
+ case Opcode::WriteSharedU64:
+ case Opcode::WriteSharedU128:
+ case Opcode::SharedAtomicIAdd32:
+ case Opcode::SharedAtomicSMin32:
+ case Opcode::SharedAtomicUMin32:
+ case Opcode::SharedAtomicSMax32:
+ case Opcode::SharedAtomicUMax32:
+ case Opcode::SharedAtomicInc32:
+ case Opcode::SharedAtomicDec32:
+ case Opcode::SharedAtomicAnd32:
+ case Opcode::SharedAtomicOr32:
+ case Opcode::SharedAtomicXor32:
+ case Opcode::SharedAtomicExchange32:
+ case Opcode::SharedAtomicExchange64:
+ case Opcode::GlobalAtomicIAdd32:
+ case Opcode::GlobalAtomicSMin32:
+ case Opcode::GlobalAtomicUMin32:
+ case Opcode::GlobalAtomicSMax32:
+ case Opcode::GlobalAtomicUMax32:
+ case Opcode::GlobalAtomicInc32:
+ case Opcode::GlobalAtomicDec32:
+ case Opcode::GlobalAtomicAnd32:
+ case Opcode::GlobalAtomicOr32:
+ case Opcode::GlobalAtomicXor32:
+ case Opcode::GlobalAtomicExchange32:
+ case Opcode::GlobalAtomicIAdd64:
+ case Opcode::GlobalAtomicSMin64:
+ case Opcode::GlobalAtomicUMin64:
+ case Opcode::GlobalAtomicSMax64:
+ case Opcode::GlobalAtomicUMax64:
+ case Opcode::GlobalAtomicAnd64:
+ case Opcode::GlobalAtomicOr64:
+ case Opcode::GlobalAtomicXor64:
+ case Opcode::GlobalAtomicExchange64:
+ case Opcode::GlobalAtomicAddF32:
+ case Opcode::GlobalAtomicAddF16x2:
+ case Opcode::GlobalAtomicAddF32x2:
+ case Opcode::GlobalAtomicMinF16x2:
+ case Opcode::GlobalAtomicMinF32x2:
+ case Opcode::GlobalAtomicMaxF16x2:
+ case Opcode::GlobalAtomicMaxF32x2:
+ case Opcode::StorageAtomicIAdd32:
+ case Opcode::StorageAtomicSMin32:
+ case Opcode::StorageAtomicUMin32:
+ case Opcode::StorageAtomicSMax32:
+ case Opcode::StorageAtomicUMax32:
+ case Opcode::StorageAtomicInc32:
+ case Opcode::StorageAtomicDec32:
+ case Opcode::StorageAtomicAnd32:
+ case Opcode::StorageAtomicOr32:
+ case Opcode::StorageAtomicXor32:
+ case Opcode::StorageAtomicExchange32:
+ case Opcode::StorageAtomicIAdd64:
+ case Opcode::StorageAtomicSMin64:
+ case Opcode::StorageAtomicUMin64:
+ case Opcode::StorageAtomicSMax64:
+ case Opcode::StorageAtomicUMax64:
+ case Opcode::StorageAtomicAnd64:
+ case Opcode::StorageAtomicOr64:
+ case Opcode::StorageAtomicXor64:
+ case Opcode::StorageAtomicExchange64:
+ case Opcode::StorageAtomicAddF32:
+ case Opcode::StorageAtomicAddF16x2:
+ case Opcode::StorageAtomicAddF32x2:
+ case Opcode::StorageAtomicMinF16x2:
+ case Opcode::StorageAtomicMinF32x2:
+ case Opcode::StorageAtomicMaxF16x2:
+ case Opcode::StorageAtomicMaxF32x2:
+ case Opcode::BindlessImageWrite:
+ case Opcode::BoundImageWrite:
+ case Opcode::ImageWrite:
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BoundImageAtomicExchange32:
+ case IR::Opcode::ImageAtomicIAdd32:
+ case IR::Opcode::ImageAtomicSMin32:
+ case IR::Opcode::ImageAtomicUMin32:
+ case IR::Opcode::ImageAtomicSMax32:
+ case IR::Opcode::ImageAtomicUMax32:
+ case IR::Opcode::ImageAtomicInc32:
+ case IR::Opcode::ImageAtomicDec32:
+ case IR::Opcode::ImageAtomicAnd32:
+ case IR::Opcode::ImageAtomicOr32:
+ case IR::Opcode::ImageAtomicXor32:
+ case IR::Opcode::ImageAtomicExchange32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Inst::IsPseudoInstruction() const noexcept {
+ switch (op) {
+ case Opcode::GetZeroFromOp:
+ case Opcode::GetSignFromOp:
+ case Opcode::GetCarryFromOp:
+ case Opcode::GetOverflowFromOp:
+ case Opcode::GetSparseFromOp:
+ case Opcode::GetInBoundsFromOp:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Inst::AreAllArgsImmediates() const {
+ if (op == Opcode::Phi) {
+ throw LogicError("Testing for all arguments are immediates on phi instruction");
+ }
+ return std::all_of(args.begin(), args.begin() + NumArgs(),
+ [](const IR::Value& value) { return value.IsImmediate(); });
+}
+
+Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
+ if (!associated_insts) {
+ return nullptr;
+ }
+ switch (opcode) {
+ case Opcode::GetZeroFromOp:
+ CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
+ return associated_insts->zero_inst;
+ case Opcode::GetSignFromOp:
+ CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
+ return associated_insts->sign_inst;
+ case Opcode::GetCarryFromOp:
+ CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
+ return associated_insts->carry_inst;
+ case Opcode::GetOverflowFromOp:
+ CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
+ return associated_insts->overflow_inst;
+ case Opcode::GetSparseFromOp:
+ CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
+ return associated_insts->sparse_inst;
+ case Opcode::GetInBoundsFromOp:
+ CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
+ return associated_insts->in_bounds_inst;
+ default:
+ throw InvalidArgument("{} is not a pseudo-instruction", opcode);
+ }
+}
+
+IR::Type Inst::Type() const {
+ return TypeOf(op);
+}
+
+void Inst::SetArg(size_t index, Value value) {
+ if (index >= NumArgs()) {
+ throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
+ }
+ const IR::Value arg{Arg(index)};
+ if (!arg.IsImmediate()) {
+ UndoUse(arg);
+ }
+ if (!value.IsImmediate()) {
+ Use(value);
+ }
+ if (op == Opcode::Phi) {
+ phi_args[index].second = value;
+ } else {
+ args[index] = value;
+ }
+}
+
+Block* Inst::PhiBlock(size_t index) const {
+ if (op != Opcode::Phi) {
+ throw LogicError("{} is not a Phi instruction", op);
+ }
+ if (index >= phi_args.size()) {
+ throw InvalidArgument("Out of bounds argument index {} in phi instruction");
+ }
+ return phi_args[index].first;
+}
+
+void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
+ if (!value.IsImmediate()) {
+ Use(value);
+ }
+ phi_args.emplace_back(predecessor, value);
+}
+
+void Inst::Invalidate() {
+ ClearArgs();
+ ReplaceOpcode(Opcode::Void);
+}
+
+void Inst::ClearArgs() {
+ if (op == Opcode::Phi) {
+ for (auto& pair : phi_args) {
+ IR::Value& value{pair.second};
+ if (!value.IsImmediate()) {
+ UndoUse(value);
+ }
+ }
+ phi_args.clear();
+ } else {
+ for (auto& value : args) {
+ if (!value.IsImmediate()) {
+ UndoUse(value);
+ }
+ }
+ // Reset arguments to null
+ // std::memset was measured to be faster on MSVC than std::ranges:fill
+ std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
+ }
+}
+
+void Inst::ReplaceUsesWith(Value replacement) {
+ Invalidate();
+ ReplaceOpcode(Opcode::Identity);
+ if (!replacement.IsImmediate()) {
+ Use(replacement);
+ }
+ args[0] = replacement;
+}
+
+void Inst::ReplaceOpcode(IR::Opcode opcode) {
+ if (opcode == IR::Opcode::Phi) {
+ throw LogicError("Cannot transition into Phi");
+ }
+ if (op == Opcode::Phi) {
+ // Transition out of phi arguments into non-phi
+ std::destroy_at(&phi_args);
+ std::construct_at(&args);
+ }
+ op = opcode;
+}
+
+void Inst::Use(const Value& value) {
+ Inst* const inst{value.Inst()};
+ ++inst->use_count;
+
+ std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
+ switch (op) {
+ case Opcode::GetZeroFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->zero_inst, this);
+ break;
+ case Opcode::GetSignFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->sign_inst, this);
+ break;
+ case Opcode::GetCarryFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->carry_inst, this);
+ break;
+ case Opcode::GetOverflowFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->overflow_inst, this);
+ break;
+ case Opcode::GetSparseFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->sparse_inst, this);
+ break;
+ case Opcode::GetInBoundsFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
+ break;
+ default:
+ break;
+ }
+}
+
+void Inst::UndoUse(const Value& value) {
+ Inst* const inst{value.Inst()};
+ --inst->use_count;
+
+ std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
+ switch (op) {
+ case Opcode::GetZeroFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
+ break;
+ case Opcode::GetSignFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
+ break;
+ case Opcode::GetCarryFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
+ break;
+ case Opcode::GetOverflowFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
+ break;
+ case Opcode::GetSparseFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
+ break;
+ case Opcode::GetInBoundsFromOp:
+ AllocAssociatedInsts(assoc_inst);
+ RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..77cda1f8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,49 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::IR {
+
+enum class FmzMode : u8 {
+ DontCare, // Not specified for this instruction
+ FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
+ FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
+ None, // Denorms are not flushed, NAN is propagated (nouveau)
+};
+
+enum class FpRounding : u8 {
+ DontCare, // Not specified for this instruction
+ RN, // Round to nearest even,
+ RM, // Round towards negative infinity
+ RP, // Round towards positive infinity
+ RZ, // Round towards zero
+};
+
+struct FpControl {
+ bool no_contraction{false};
+ FpRounding rounding{FpRounding::DontCare};
+ FmzMode fmz_mode{FmzMode::DontCare};
+};
+static_assert(sizeof(FpControl) <= sizeof(u32));
+
+union TextureInstInfo {
+ u32 raw;
+ BitField<0, 16, u32> descriptor_index;
+ BitField<16, 3, TextureType> type;
+ BitField<19, 1, u32> is_depth;
+ BitField<20, 1, u32> has_bias;
+ BitField<21, 1, u32> has_lod_clamp;
+ BitField<22, 1, u32> relaxed_precision;
+ BitField<23, 2, u32> gather_component;
+ BitField<25, 2, u32> num_derivates;
+ BitField<27, 3, ImageFormat> image_format;
+};
+static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
new file mode 100644
index 000000000..24d024ad7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -0,0 +1,15 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string_view>
+
+#include "shader_recompiler/frontend/ir/opcodes.h"
+
+namespace Shader::IR {
+
+std::string_view NameOf(Opcode op) {
+ return Detail::META_TABLE[static_cast<size_t>(op)].name;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
new file mode 100644
index 000000000..9ab108292
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+enum class Opcode {
+#define OPCODE(name, ...) name,
+#include "opcodes.inc"
+#undef OPCODE
+};
+
+namespace Detail {
+struct OpcodeMeta {
+ std::string_view name;
+ Type type;
+ std::array<Type, 5> arg_types;
+};
+
+// using enum Type;
+constexpr Type Void{Type::Void};
+constexpr Type Opaque{Type::Opaque};
+constexpr Type Reg{Type::Reg};
+constexpr Type Pred{Type::Pred};
+constexpr Type Attribute{Type::Attribute};
+constexpr Type Patch{Type::Patch};
+constexpr Type U1{Type::U1};
+constexpr Type U8{Type::U8};
+constexpr Type U16{Type::U16};
+constexpr Type U32{Type::U32};
+constexpr Type U64{Type::U64};
+constexpr Type F16{Type::F16};
+constexpr Type F32{Type::F32};
+constexpr Type F64{Type::F64};
+constexpr Type U32x2{Type::U32x2};
+constexpr Type U32x3{Type::U32x3};
+constexpr Type U32x4{Type::U32x4};
+constexpr Type F16x2{Type::F16x2};
+constexpr Type F16x3{Type::F16x3};
+constexpr Type F16x4{Type::F16x4};
+constexpr Type F32x2{Type::F32x2};
+constexpr Type F32x3{Type::F32x3};
+constexpr Type F32x4{Type::F32x4};
+constexpr Type F64x2{Type::F64x2};
+constexpr Type F64x3{Type::F64x3};
+constexpr Type F64x4{Type::F64x4};
+
+constexpr OpcodeMeta META_TABLE[]{
+#define OPCODE(name_token, type_token, ...) \
+ { \
+ .name{#name_token}, \
+ .type = type_token, \
+ .arg_types{__VA_ARGS__}, \
+ },
+#include "opcodes.inc"
+#undef OPCODE
+};
+constexpr size_t CalculateNumArgsOf(Opcode op) {
+ const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
+ return static_cast<size_t>(
+ std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
+}
+
+constexpr u8 NUM_ARGS[]{
+#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
+#include "opcodes.inc"
+#undef OPCODE
+};
+} // namespace Detail
+
+/// Get return type of an opcode
+[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
+ return Detail::META_TABLE[static_cast<size_t>(op)].type;
+}
+
+/// Get the number of arguments an opcode accepts
+[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
+ return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
+}
+
+/// Get the required type of an argument of an opcode
+[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
+ return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
+}
+
+/// Get the name of an opcode
+[[nodiscard]] std::string_view NameOf(Opcode op);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Opcode> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
+ return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
new file mode 100644
index 000000000..d91098c80
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -0,0 +1,550 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
+OPCODE(Phi, Opaque, )
+OPCODE(Identity, Opaque, Opaque, )
+OPCODE(Void, Void, )
+OPCODE(ConditionRef, U1, U1, )
+OPCODE(Reference, Void, Opaque, )
+OPCODE(PhiMove, Void, Opaque, Opaque, )
+
+// Special operations
+OPCODE(Prologue, Void, )
+OPCODE(Epilogue, Void, )
+OPCODE(Join, Void, )
+OPCODE(DemoteToHelperInvocation, Void, )
+OPCODE(EmitVertex, Void, U32, )
+OPCODE(EndPrimitive, Void, U32, )
+
+// Barriers
+OPCODE(Barrier, Void, )
+OPCODE(WorkgroupMemoryBarrier, Void, )
+OPCODE(DeviceMemoryBarrier, Void, )
+
+// Context getters/setters
+OPCODE(GetRegister, U32, Reg, )
+OPCODE(SetRegister, Void, Reg, U32, )
+OPCODE(GetPred, U1, Pred, )
+OPCODE(SetPred, Void, Pred, U1, )
+OPCODE(GetGotoVariable, U1, U32, )
+OPCODE(SetGotoVariable, Void, U32, U1, )
+OPCODE(GetIndirectBranchVariable, U32, )
+OPCODE(SetIndirectBranchVariable, Void, U32, )
+OPCODE(GetCbufU8, U32, U32, U32, )
+OPCODE(GetCbufS8, U32, U32, U32, )
+OPCODE(GetCbufU16, U32, U32, U32, )
+OPCODE(GetCbufS16, U32, U32, U32, )
+OPCODE(GetCbufU32, U32, U32, U32, )
+OPCODE(GetCbufF32, F32, U32, U32, )
+OPCODE(GetCbufU32x2, U32x2, U32, U32, )
+OPCODE(GetAttribute, F32, Attribute, U32, )
+OPCODE(SetAttribute, Void, Attribute, F32, U32, )
+OPCODE(GetAttributeIndexed, F32, U32, U32, )
+OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
+OPCODE(GetPatch, F32, Patch, )
+OPCODE(SetPatch, Void, Patch, F32, )
+OPCODE(SetFragColor, Void, U32, U32, F32, )
+OPCODE(SetSampleMask, Void, U32, )
+OPCODE(SetFragDepth, Void, F32, )
+OPCODE(GetZFlag, U1, Void, )
+OPCODE(GetSFlag, U1, Void, )
+OPCODE(GetCFlag, U1, Void, )
+OPCODE(GetOFlag, U1, Void, )
+OPCODE(SetZFlag, Void, U1, )
+OPCODE(SetSFlag, Void, U1, )
+OPCODE(SetCFlag, Void, U1, )
+OPCODE(SetOFlag, Void, U1, )
+OPCODE(WorkgroupId, U32x3, )
+OPCODE(LocalInvocationId, U32x3, )
+OPCODE(InvocationId, U32, )
+OPCODE(SampleId, U32, )
+OPCODE(IsHelperInvocation, U1, )
+OPCODE(YDirection, F32, )
+
+// Undefined
+OPCODE(UndefU1, U1, )
+OPCODE(UndefU8, U8, )
+OPCODE(UndefU16, U16, )
+OPCODE(UndefU32, U32, )
+OPCODE(UndefU64, U64, )
+
+// Memory operations
+OPCODE(LoadGlobalU8, U32, Opaque, )
+OPCODE(LoadGlobalS8, U32, Opaque, )
+OPCODE(LoadGlobalU16, U32, Opaque, )
+OPCODE(LoadGlobalS16, U32, Opaque, )
+OPCODE(LoadGlobal32, U32, Opaque, )
+OPCODE(LoadGlobal64, U32x2, Opaque, )
+OPCODE(LoadGlobal128, U32x4, Opaque, )
+OPCODE(WriteGlobalU8, Void, Opaque, U32, )
+OPCODE(WriteGlobalS8, Void, Opaque, U32, )
+OPCODE(WriteGlobalU16, Void, Opaque, U32, )
+OPCODE(WriteGlobalS16, Void, Opaque, U32, )
+OPCODE(WriteGlobal32, Void, Opaque, U32, )
+OPCODE(WriteGlobal64, Void, Opaque, U32x2, )
+OPCODE(WriteGlobal128, Void, Opaque, U32x4, )
+
+// Storage buffer operations
+OPCODE(LoadStorageU8, U32, U32, U32, )
+OPCODE(LoadStorageS8, U32, U32, U32, )
+OPCODE(LoadStorageU16, U32, U32, U32, )
+OPCODE(LoadStorageS16, U32, U32, U32, )
+OPCODE(LoadStorage32, U32, U32, U32, )
+OPCODE(LoadStorage64, U32x2, U32, U32, )
+OPCODE(LoadStorage128, U32x4, U32, U32, )
+OPCODE(WriteStorageU8, Void, U32, U32, U32, )
+OPCODE(WriteStorageS8, Void, U32, U32, U32, )
+OPCODE(WriteStorageU16, Void, U32, U32, U32, )
+OPCODE(WriteStorageS16, Void, U32, U32, U32, )
+OPCODE(WriteStorage32, Void, U32, U32, U32, )
+OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
+OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
+
+// Local memory operations
+OPCODE(LoadLocal, U32, U32, )
+OPCODE(WriteLocal, Void, U32, U32, )
+
+// Shared memory operations
+OPCODE(LoadSharedU8, U32, U32, )
+OPCODE(LoadSharedS8, U32, U32, )
+OPCODE(LoadSharedU16, U32, U32, )
+OPCODE(LoadSharedS16, U32, U32, )
+OPCODE(LoadSharedU32, U32, U32, )
+OPCODE(LoadSharedU64, U32x2, U32, )
+OPCODE(LoadSharedU128, U32x4, U32, )
+OPCODE(WriteSharedU8, Void, U32, U32, )
+OPCODE(WriteSharedU16, Void, U32, U32, )
+OPCODE(WriteSharedU32, Void, U32, U32, )
+OPCODE(WriteSharedU64, Void, U32, U32x2, )
+OPCODE(WriteSharedU128, Void, U32, U32x4, )
+
+// Vector utility
+OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
+OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
+OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
+OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
+OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
+OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
+OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
+OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
+OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
+OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
+OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
+OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
+OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
+OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
+OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
+OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
+OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
+OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
+OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
+OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
+OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
+OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
+OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
+OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
+OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
+OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
+OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
+OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
+OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
+OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
+OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
+OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
+OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
+OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
+OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
+OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
+
+// Select operations
+OPCODE(SelectU1, U1, U1, U1, U1, )
+OPCODE(SelectU8, U8, U1, U8, U8, )
+OPCODE(SelectU16, U16, U1, U16, U16, )
+OPCODE(SelectU32, U32, U1, U32, U32, )
+OPCODE(SelectU64, U64, U1, U64, U64, )
+OPCODE(SelectF16, F16, U1, F16, F16, )
+OPCODE(SelectF32, F32, U1, F32, F32, )
+OPCODE(SelectF64, F64, U1, F64, F64, )
+
+// Bitwise conversions
+OPCODE(BitCastU16F16, U16, F16, )
+OPCODE(BitCastU32F32, U32, F32, )
+OPCODE(BitCastU64F64, U64, F64, )
+OPCODE(BitCastF16U16, F16, U16, )
+OPCODE(BitCastF32U32, F32, U32, )
+OPCODE(BitCastF64U64, F64, U64, )
+OPCODE(PackUint2x32, U64, U32x2, )
+OPCODE(UnpackUint2x32, U32x2, U64, )
+OPCODE(PackFloat2x16, U32, F16x2, )
+OPCODE(UnpackFloat2x16, F16x2, U32, )
+OPCODE(PackHalf2x16, U32, F32x2, )
+OPCODE(UnpackHalf2x16, F32x2, U32, )
+OPCODE(PackDouble2x32, F64, U32x2, )
+OPCODE(UnpackDouble2x32, U32x2, F64, )
+
+// Pseudo-operation, handled specially at final emit
+OPCODE(GetZeroFromOp, U1, Opaque, )
+OPCODE(GetSignFromOp, U1, Opaque, )
+OPCODE(GetCarryFromOp, U1, Opaque, )
+OPCODE(GetOverflowFromOp, U1, Opaque, )
+OPCODE(GetSparseFromOp, U1, Opaque, )
+OPCODE(GetInBoundsFromOp, U1, Opaque, )
+
+// Floating-point operations
+OPCODE(FPAbs16, F16, F16, )
+OPCODE(FPAbs32, F32, F32, )
+OPCODE(FPAbs64, F64, F64, )
+OPCODE(FPAdd16, F16, F16, F16, )
+OPCODE(FPAdd32, F32, F32, F32, )
+OPCODE(FPAdd64, F64, F64, F64, )
+OPCODE(FPFma16, F16, F16, F16, F16, )
+OPCODE(FPFma32, F32, F32, F32, F32, )
+OPCODE(FPFma64, F64, F64, F64, F64, )
+OPCODE(FPMax32, F32, F32, F32, )
+OPCODE(FPMax64, F64, F64, F64, )
+OPCODE(FPMin32, F32, F32, F32, )
+OPCODE(FPMin64, F64, F64, F64, )
+OPCODE(FPMul16, F16, F16, F16, )
+OPCODE(FPMul32, F32, F32, F32, )
+OPCODE(FPMul64, F64, F64, F64, )
+OPCODE(FPNeg16, F16, F16, )
+OPCODE(FPNeg32, F32, F32, )
+OPCODE(FPNeg64, F64, F64, )
+OPCODE(FPRecip32, F32, F32, )
+OPCODE(FPRecip64, F64, F64, )
+OPCODE(FPRecipSqrt32, F32, F32, )
+OPCODE(FPRecipSqrt64, F64, F64, )
+OPCODE(FPSqrt, F32, F32, )
+OPCODE(FPSin, F32, F32, )
+OPCODE(FPExp2, F32, F32, )
+OPCODE(FPCos, F32, F32, )
+OPCODE(FPLog2, F32, F32, )
+OPCODE(FPSaturate16, F16, F16, )
+OPCODE(FPSaturate32, F32, F32, )
+OPCODE(FPSaturate64, F64, F64, )
+OPCODE(FPClamp16, F16, F16, F16, F16, )
+OPCODE(FPClamp32, F32, F32, F32, F32, )
+OPCODE(FPClamp64, F64, F64, F64, F64, )
+OPCODE(FPRoundEven16, F16, F16, )
+OPCODE(FPRoundEven32, F32, F32, )
+OPCODE(FPRoundEven64, F64, F64, )
+OPCODE(FPFloor16, F16, F16, )
+OPCODE(FPFloor32, F32, F32, )
+OPCODE(FPFloor64, F64, F64, )
+OPCODE(FPCeil16, F16, F16, )
+OPCODE(FPCeil32, F32, F32, )
+OPCODE(FPCeil64, F64, F64, )
+OPCODE(FPTrunc16, F16, F16, )
+OPCODE(FPTrunc32, F32, F32, )
+OPCODE(FPTrunc64, F64, F64, )
+
+OPCODE(FPOrdEqual16, U1, F16, F16, )
+OPCODE(FPOrdEqual32, U1, F32, F32, )
+OPCODE(FPOrdEqual64, U1, F64, F64, )
+OPCODE(FPUnordEqual16, U1, F16, F16, )
+OPCODE(FPUnordEqual32, U1, F32, F32, )
+OPCODE(FPUnordEqual64, U1, F64, F64, )
+OPCODE(FPOrdNotEqual16, U1, F16, F16, )
+OPCODE(FPOrdNotEqual32, U1, F32, F32, )
+OPCODE(FPOrdNotEqual64, U1, F64, F64, )
+OPCODE(FPUnordNotEqual16, U1, F16, F16, )
+OPCODE(FPUnordNotEqual32, U1, F32, F32, )
+OPCODE(FPUnordNotEqual64, U1, F64, F64, )
+OPCODE(FPOrdLessThan16, U1, F16, F16, )
+OPCODE(FPOrdLessThan32, U1, F32, F32, )
+OPCODE(FPOrdLessThan64, U1, F64, F64, )
+OPCODE(FPUnordLessThan16, U1, F16, F16, )
+OPCODE(FPUnordLessThan32, U1, F32, F32, )
+OPCODE(FPUnordLessThan64, U1, F64, F64, )
+OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
+OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
+OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
+OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
+OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
+OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
+OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
+OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
+OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
+OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
+OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
+OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
+OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
+OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
+OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
+OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
+OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
+OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
+OPCODE(FPIsNan16, U1, F16, )
+OPCODE(FPIsNan32, U1, F32, )
+OPCODE(FPIsNan64, U1, F64, )
+
+// Integer operations
+OPCODE(IAdd32, U32, U32, U32, )
+OPCODE(IAdd64, U64, U64, U64, )
+OPCODE(ISub32, U32, U32, U32, )
+OPCODE(ISub64, U64, U64, U64, )
+OPCODE(IMul32, U32, U32, U32, )
+OPCODE(INeg32, U32, U32, )
+OPCODE(INeg64, U64, U64, )
+OPCODE(IAbs32, U32, U32, )
+OPCODE(ShiftLeftLogical32, U32, U32, U32, )
+OPCODE(ShiftLeftLogical64, U64, U64, U32, )
+OPCODE(ShiftRightLogical32, U32, U32, U32, )
+OPCODE(ShiftRightLogical64, U64, U64, U32, )
+OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
+OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
+OPCODE(BitwiseAnd32, U32, U32, U32, )
+OPCODE(BitwiseOr32, U32, U32, U32, )
+OPCODE(BitwiseXor32, U32, U32, U32, )
+OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
+OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
+OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
+OPCODE(BitReverse32, U32, U32, )
+OPCODE(BitCount32, U32, U32, )
+OPCODE(BitwiseNot32, U32, U32, )
+
+OPCODE(FindSMsb32, U32, U32, )
+OPCODE(FindUMsb32, U32, U32, )
+OPCODE(SMin32, U32, U32, U32, )
+OPCODE(UMin32, U32, U32, U32, )
+OPCODE(SMax32, U32, U32, U32, )
+OPCODE(UMax32, U32, U32, U32, )
+OPCODE(SClamp32, U32, U32, U32, U32, )
+OPCODE(UClamp32, U32, U32, U32, U32, )
+OPCODE(SLessThan, U1, U32, U32, )
+OPCODE(ULessThan, U1, U32, U32, )
+OPCODE(IEqual, U1, U32, U32, )
+OPCODE(SLessThanEqual, U1, U32, U32, )
+OPCODE(ULessThanEqual, U1, U32, U32, )
+OPCODE(SGreaterThan, U1, U32, U32, )
+OPCODE(UGreaterThan, U1, U32, U32, )
+OPCODE(INotEqual, U1, U32, U32, )
+OPCODE(SGreaterThanEqual, U1, U32, U32, )
+OPCODE(UGreaterThanEqual, U1, U32, U32, )
+
+// Atomic operations
+OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
+OPCODE(SharedAtomicSMin32, U32, U32, U32, )
+OPCODE(SharedAtomicUMin32, U32, U32, U32, )
+OPCODE(SharedAtomicSMax32, U32, U32, U32, )
+OPCODE(SharedAtomicUMax32, U32, U32, U32, )
+OPCODE(SharedAtomicInc32, U32, U32, U32, )
+OPCODE(SharedAtomicDec32, U32, U32, U32, )
+OPCODE(SharedAtomicAnd32, U32, U32, U32, )
+OPCODE(SharedAtomicOr32, U32, U32, U32, )
+OPCODE(SharedAtomicXor32, U32, U32, U32, )
+OPCODE(SharedAtomicExchange32, U32, U32, U32, )
+OPCODE(SharedAtomicExchange64, U64, U32, U64, )
+
+OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
+OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
+OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
+OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
+OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
+OPCODE(GlobalAtomicInc32, U32, U64, U32, )
+OPCODE(GlobalAtomicDec32, U32, U64, U32, )
+OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
+OPCODE(GlobalAtomicOr32, U32, U64, U32, )
+OPCODE(GlobalAtomicXor32, U32, U64, U32, )
+OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
+OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
+OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
+OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
+OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
+OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
+OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
+OPCODE(GlobalAtomicOr64, U64, U64, U64, )
+OPCODE(GlobalAtomicXor64, U64, U64, U64, )
+OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
+OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
+OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
+OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
+OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
+OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
+OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
+OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
+
+OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
+OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
+OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
+OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
+OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
+OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
+OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
+OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
+OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
+
+// Logical operations
+OPCODE(LogicalOr, U1, U1, U1, )
+OPCODE(LogicalAnd, U1, U1, U1, )
+OPCODE(LogicalXor, U1, U1, U1, )
+OPCODE(LogicalNot, U1, U1, )
+
+// Conversion operations
+OPCODE(ConvertS16F16, U32, F16, )
+OPCODE(ConvertS16F32, U32, F32, )
+OPCODE(ConvertS16F64, U32, F64, )
+OPCODE(ConvertS32F16, U32, F16, )
+OPCODE(ConvertS32F32, U32, F32, )
+OPCODE(ConvertS32F64, U32, F64, )
+OPCODE(ConvertS64F16, U64, F16, )
+OPCODE(ConvertS64F32, U64, F32, )
+OPCODE(ConvertS64F64, U64, F64, )
+OPCODE(ConvertU16F16, U32, F16, )
+OPCODE(ConvertU16F32, U32, F32, )
+OPCODE(ConvertU16F64, U32, F64, )
+OPCODE(ConvertU32F16, U32, F16, )
+OPCODE(ConvertU32F32, U32, F32, )
+OPCODE(ConvertU32F64, U32, F64, )
+OPCODE(ConvertU64F16, U64, F16, )
+OPCODE(ConvertU64F32, U64, F32, )
+OPCODE(ConvertU64F64, U64, F64, )
+OPCODE(ConvertU64U32, U64, U32, )
+OPCODE(ConvertU32U64, U32, U64, )
+OPCODE(ConvertF16F32, F16, F32, )
+OPCODE(ConvertF32F16, F32, F16, )
+OPCODE(ConvertF32F64, F32, F64, )
+OPCODE(ConvertF64F32, F64, F32, )
+OPCODE(ConvertF16S8, F16, U32, )
+OPCODE(ConvertF16S16, F16, U32, )
+OPCODE(ConvertF16S32, F16, U32, )
+OPCODE(ConvertF16S64, F16, U64, )
+OPCODE(ConvertF16U8, F16, U32, )
+OPCODE(ConvertF16U16, F16, U32, )
+OPCODE(ConvertF16U32, F16, U32, )
+OPCODE(ConvertF16U64, F16, U64, )
+OPCODE(ConvertF32S8, F32, U32, )
+OPCODE(ConvertF32S16, F32, U32, )
+OPCODE(ConvertF32S32, F32, U32, )
+OPCODE(ConvertF32S64, F32, U64, )
+OPCODE(ConvertF32U8, F32, U32, )
+OPCODE(ConvertF32U16, F32, U32, )
+OPCODE(ConvertF32U32, F32, U32, )
+OPCODE(ConvertF32U64, F32, U64, )
+OPCODE(ConvertF64S8, F64, U32, )
+OPCODE(ConvertF64S16, F64, U32, )
+OPCODE(ConvertF64S32, F64, U32, )
+OPCODE(ConvertF64S64, F64, U64, )
+OPCODE(ConvertF64U8, F64, U32, )
+OPCODE(ConvertF64U16, F64, U32, )
+OPCODE(ConvertF64U32, F64, U32, )
+OPCODE(ConvertF64U64, F64, U64, )
+
+// Image operations
+OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
+OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
+OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
+OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
+OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
+OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, )
+
+OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
+OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
+OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
+OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
+OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
+OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(BoundImageRead, U32x4, U32, Opaque, )
+OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, )
+
+OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
+OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
+OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
+OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
+OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
+OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
+OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
+OPCODE(ImageRead, U32x4, Opaque, Opaque, )
+OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
+
+// Atomic Image operations
+
+OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, )
+OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, )
+
+OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, )
+OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, )
+
+OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
+OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
+
+// Warp operations
+OPCODE(LaneId, U32, )
+OPCODE(VoteAll, U1, U1, )
+OPCODE(VoteAny, U1, U1, )
+OPCODE(VoteEqual, U1, U1, )
+OPCODE(SubgroupBallot, U32, U1, )
+OPCODE(SubgroupEqMask, U32, )
+OPCODE(SubgroupLtMask, U32, )
+OPCODE(SubgroupLeMask, U32, )
+OPCODE(SubgroupGtMask, U32, )
+OPCODE(SubgroupGeMask, U32, )
+OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, )
+OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
+OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
+OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
+OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
+OPCODE(DPdxFine, F32, F32, )
+OPCODE(DPdyFine, F32, F32, )
+OPCODE(DPdxCoarse, F32, F32, )
+OPCODE(DPdyCoarse, F32, F32, )
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 000000000..4c956a970
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/patch.h"
+
+namespace Shader::IR {
+
+bool IsGeneric(Patch patch) noexcept {
+ return patch >= Patch::Component0 && patch <= Patch::Component119;
+}
+
+u32 GenericPatchIndex(Patch patch) {
+ if (!IsGeneric(patch)) {
+ throw InvalidArgument("Patch {} is not generic", patch);
+ }
+ return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
+}
+
+u32 GenericPatchElement(Patch patch) {
+ if (!IsGeneric(patch)) {
+ throw InvalidArgument("Patch {} is not generic", patch);
+ }
+ return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 000000000..6d66ff0d6
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader::IR {
+
+enum class Patch : u64 {
+ TessellationLodLeft,
+ TessellationLodTop,
+ TessellationLodRight,
+ TessellationLodBottom,
+ TessellationLodInteriorU,
+ TessellationLodInteriorV,
+ ComponentPadding0,
+ ComponentPadding1,
+ Component0,
+ Component1,
+ Component2,
+ Component3,
+ Component4,
+ Component5,
+ Component6,
+ Component7,
+ Component8,
+ Component9,
+ Component10,
+ Component11,
+ Component12,
+ Component13,
+ Component14,
+ Component15,
+ Component16,
+ Component17,
+ Component18,
+ Component19,
+ Component20,
+ Component21,
+ Component22,
+ Component23,
+ Component24,
+ Component25,
+ Component26,
+ Component27,
+ Component28,
+ Component29,
+ Component30,
+ Component31,
+ Component32,
+ Component33,
+ Component34,
+ Component35,
+ Component36,
+ Component37,
+ Component38,
+ Component39,
+ Component40,
+ Component41,
+ Component42,
+ Component43,
+ Component44,
+ Component45,
+ Component46,
+ Component47,
+ Component48,
+ Component49,
+ Component50,
+ Component51,
+ Component52,
+ Component53,
+ Component54,
+ Component55,
+ Component56,
+ Component57,
+ Component58,
+ Component59,
+ Component60,
+ Component61,
+ Component62,
+ Component63,
+ Component64,
+ Component65,
+ Component66,
+ Component67,
+ Component68,
+ Component69,
+ Component70,
+ Component71,
+ Component72,
+ Component73,
+ Component74,
+ Component75,
+ Component76,
+ Component77,
+ Component78,
+ Component79,
+ Component80,
+ Component81,
+ Component82,
+ Component83,
+ Component84,
+ Component85,
+ Component86,
+ Component87,
+ Component88,
+ Component89,
+ Component90,
+ Component91,
+ Component92,
+ Component93,
+ Component94,
+ Component95,
+ Component96,
+ Component97,
+ Component98,
+ Component99,
+ Component100,
+ Component101,
+ Component102,
+ Component103,
+ Component104,
+ Component105,
+ Component106,
+ Component107,
+ Component108,
+ Component109,
+ Component110,
+ Component111,
+ Component112,
+ Component113,
+ Component114,
+ Component115,
+ Component116,
+ Component117,
+ Component118,
+ Component119,
+};
+static_assert(static_cast<u64>(Patch::Component119) == 127);
+
+[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
+
+[[nodiscard]] u32 GenericPatchIndex(Patch patch);
+
+[[nodiscard]] u32 GenericPatchElement(Patch patch);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
new file mode 100644
index 000000000..16bc44101
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+
+namespace Shader::IR {
+
+BlockList PostOrder(const AbstractSyntaxNode& root) {
+ boost::container::small_vector<Block*, 16> block_stack;
+ boost::container::flat_set<Block*> visited;
+ BlockList post_order_blocks;
+
+ if (root.type != AbstractSyntaxNode::Type::Block) {
+ throw LogicError("First node in abstract syntax list root is not a block");
+ }
+ Block* const first_block{root.data.block};
+ visited.insert(first_block);
+ block_stack.push_back(first_block);
+
+ while (!block_stack.empty()) {
+ Block* const block{block_stack.back()};
+ const auto visit{[&](Block* branch) {
+ if (!visited.insert(branch).second) {
+ return false;
+ }
+ // Calling push_back twice is faster than insert on MSVC
+ block_stack.push_back(block);
+ block_stack.push_back(branch);
+ return true;
+ }};
+ block_stack.pop_back();
+ if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
+ post_order_blocks.push_back(block);
+ }
+ }
+ return post_order_blocks;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
new file mode 100644
index 000000000..07bfbadc3
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::IR {
+
+BlockList PostOrder(const AbstractSyntaxNode& root);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
new file mode 100644
index 000000000..4e7f32423
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::IR {
+
+enum class Pred : u64 {
+ P0,
+ P1,
+ P2,
+ P3,
+ P4,
+ P5,
+ P6,
+ PT,
+};
+
+constexpr size_t NUM_USER_PREDS = 7;
+constexpr size_t NUM_PREDS = 8;
+
+[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
+ return static_cast<size_t>(pred);
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Pred> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
+ if (pred == Shader::IR::Pred::PT) {
+ return fmt::format_to(ctx.out(), "PT");
+ } else {
+ return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
+ }
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
new file mode 100644
index 000000000..3fc06f855
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <string>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+std::string DumpProgram(const Program& program) {
+ size_t index{0};
+ std::map<const IR::Inst*, size_t> inst_to_index;
+ std::map<const IR::Block*, size_t> block_to_index;
+
+ for (const IR::Block* const block : program.blocks) {
+ block_to_index.emplace(block, index);
+ ++index;
+ }
+ std::string ret;
+ for (const auto& block : program.blocks) {
+ ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
+ }
+ return ret;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
new file mode 100644
index 000000000..ebcaa8bc2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string>
+
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/program_header.h"
+#include "shader_recompiler/shader_info.h"
+#include "shader_recompiler/stage.h"
+
+namespace Shader::IR {
+
+struct Program {
+ AbstractSyntaxList syntax_list;
+ BlockList blocks;
+ BlockList post_order_blocks;
+ Info info;
+ Stage stage{};
+ std::array<u32, 3> workgroup_size{};
+ OutputTopology output_topology{};
+ u32 output_vertices{};
+ u32 invocations{};
+ u32 local_memory_size{};
+ u32 shared_memory_size{};
+ bool is_geometry_passthrough{};
+};
+
+[[nodiscard]] std::string DumpProgram(const Program& program);
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
new file mode 100644
index 000000000..a4b635792
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -0,0 +1,332 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+enum class Reg : u64 {
+ R0,
+ R1,
+ R2,
+ R3,
+ R4,
+ R5,
+ R6,
+ R7,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+ R16,
+ R17,
+ R18,
+ R19,
+ R20,
+ R21,
+ R22,
+ R23,
+ R24,
+ R25,
+ R26,
+ R27,
+ R28,
+ R29,
+ R30,
+ R31,
+ R32,
+ R33,
+ R34,
+ R35,
+ R36,
+ R37,
+ R38,
+ R39,
+ R40,
+ R41,
+ R42,
+ R43,
+ R44,
+ R45,
+ R46,
+ R47,
+ R48,
+ R49,
+ R50,
+ R51,
+ R52,
+ R53,
+ R54,
+ R55,
+ R56,
+ R57,
+ R58,
+ R59,
+ R60,
+ R61,
+ R62,
+ R63,
+ R64,
+ R65,
+ R66,
+ R67,
+ R68,
+ R69,
+ R70,
+ R71,
+ R72,
+ R73,
+ R74,
+ R75,
+ R76,
+ R77,
+ R78,
+ R79,
+ R80,
+ R81,
+ R82,
+ R83,
+ R84,
+ R85,
+ R86,
+ R87,
+ R88,
+ R89,
+ R90,
+ R91,
+ R92,
+ R93,
+ R94,
+ R95,
+ R96,
+ R97,
+ R98,
+ R99,
+ R100,
+ R101,
+ R102,
+ R103,
+ R104,
+ R105,
+ R106,
+ R107,
+ R108,
+ R109,
+ R110,
+ R111,
+ R112,
+ R113,
+ R114,
+ R115,
+ R116,
+ R117,
+ R118,
+ R119,
+ R120,
+ R121,
+ R122,
+ R123,
+ R124,
+ R125,
+ R126,
+ R127,
+ R128,
+ R129,
+ R130,
+ R131,
+ R132,
+ R133,
+ R134,
+ R135,
+ R136,
+ R137,
+ R138,
+ R139,
+ R140,
+ R141,
+ R142,
+ R143,
+ R144,
+ R145,
+ R146,
+ R147,
+ R148,
+ R149,
+ R150,
+ R151,
+ R152,
+ R153,
+ R154,
+ R155,
+ R156,
+ R157,
+ R158,
+ R159,
+ R160,
+ R161,
+ R162,
+ R163,
+ R164,
+ R165,
+ R166,
+ R167,
+ R168,
+ R169,
+ R170,
+ R171,
+ R172,
+ R173,
+ R174,
+ R175,
+ R176,
+ R177,
+ R178,
+ R179,
+ R180,
+ R181,
+ R182,
+ R183,
+ R184,
+ R185,
+ R186,
+ R187,
+ R188,
+ R189,
+ R190,
+ R191,
+ R192,
+ R193,
+ R194,
+ R195,
+ R196,
+ R197,
+ R198,
+ R199,
+ R200,
+ R201,
+ R202,
+ R203,
+ R204,
+ R205,
+ R206,
+ R207,
+ R208,
+ R209,
+ R210,
+ R211,
+ R212,
+ R213,
+ R214,
+ R215,
+ R216,
+ R217,
+ R218,
+ R219,
+ R220,
+ R221,
+ R222,
+ R223,
+ R224,
+ R225,
+ R226,
+ R227,
+ R228,
+ R229,
+ R230,
+ R231,
+ R232,
+ R233,
+ R234,
+ R235,
+ R236,
+ R237,
+ R238,
+ R239,
+ R240,
+ R241,
+ R242,
+ R243,
+ R244,
+ R245,
+ R246,
+ R247,
+ R248,
+ R249,
+ R250,
+ R251,
+ R252,
+ R253,
+ R254,
+ RZ,
+};
+static_assert(static_cast<int>(Reg::RZ) == 255);
+
+constexpr size_t NUM_USER_REGS = 255;
+constexpr size_t NUM_REGS = 256;
+
+[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
+ if (reg == Reg::RZ) {
+ // Adding or subtracting registers from RZ yields RZ
+ return Reg::RZ;
+ }
+ const int result{static_cast<int>(reg) + num};
+ if (result >= static_cast<int>(Reg::RZ)) {
+ throw LogicError("Overflow on register arithmetic");
+ }
+ if (result < 0) {
+ throw LogicError("Underflow on register arithmetic");
+ }
+ return static_cast<Reg>(result);
+}
+
+[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
+ return reg + (-num);
+}
+
+constexpr Reg operator++(Reg& reg) {
+ reg = reg + 1;
+ return reg;
+}
+
+constexpr Reg operator++(Reg& reg, int) {
+ const Reg copy{reg};
+ reg = reg + 1;
+ return copy;
+}
+
+[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
+ return static_cast<size_t>(reg);
+}
+
+[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
+ return RegIndex(reg) % align == 0 || reg == Reg::RZ;
+}
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Reg> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
+ if (reg == Shader::IR::Reg::RZ) {
+ return fmt::format_to(ctx.out(), "RZ");
+ } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
+ return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
+ } else {
+ throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
+ }
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
new file mode 100644
index 000000000..f28341bfe
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <string>
+
+#include "shader_recompiler/frontend/ir/type.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Type type) {
+ static constexpr std::array names{
+ "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
+ "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
+ "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
+ };
+ const size_t bits{static_cast<size_t>(type)};
+ if (bits == 0) {
+ return "Void";
+ }
+ std::string result;
+ for (size_t i = 0; i < names.size(); i++) {
+ if ((bits & (size_t{1} << i)) != 0) {
+ if (!result.empty()) {
+ result += '|';
+ }
+ result += names[i];
+ }
+ }
+ return result;
+}
+
+bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
+ return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
new file mode 100644
index 000000000..294b230c4
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -0,0 +1,61 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+#include <fmt/format.h>
+
+#include "common/common_funcs.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::IR {
+
+enum class Type {
+ Void = 0,
+ Opaque = 1 << 0,
+ Reg = 1 << 1,
+ Pred = 1 << 2,
+ Attribute = 1 << 3,
+ Patch = 1 << 4,
+ U1 = 1 << 5,
+ U8 = 1 << 6,
+ U16 = 1 << 7,
+ U32 = 1 << 8,
+ U64 = 1 << 9,
+ F16 = 1 << 10,
+ F32 = 1 << 11,
+ F64 = 1 << 12,
+ U32x2 = 1 << 13,
+ U32x3 = 1 << 14,
+ U32x4 = 1 << 15,
+ F16x2 = 1 << 16,
+ F16x3 = 1 << 17,
+ F16x4 = 1 << 18,
+ F32x2 = 1 << 19,
+ F32x3 = 1 << 20,
+ F32x4 = 1 << 21,
+ F64x2 = 1 << 22,
+ F64x3 = 1 << 23,
+ F64x4 = 1 << 24,
+};
+DECLARE_ENUM_FLAG_OPERATORS(Type)
+
+[[nodiscard]] std::string NameOf(Type type);
+
+[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Type> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::IR::Type& type, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{}", NameOf(type));
+ }
+};
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
new file mode 100644
index 000000000..d365ea1bc
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
+
+Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
+
+Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
+
+Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
+
+Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
+
+Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
+
+Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
+
+Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
+
+Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
+
+Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
+
+Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
+
+Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
+
+IR::Type Value::Type() const noexcept {
+ if (IsPhi()) {
+ // The type of a phi node is stored in its flags
+ return inst->Flags<IR::Type>();
+ }
+ if (IsIdentity()) {
+ return inst->Arg(0).Type();
+ }
+ if (type == Type::Opaque) {
+ return inst->Type();
+ }
+ return type;
+}
+
+bool Value::operator==(const Value& other) const {
+ if (type != other.type) {
+ return false;
+ }
+ switch (type) {
+ case Type::Void:
+ return true;
+ case Type::Opaque:
+ return inst == other.inst;
+ case Type::Reg:
+ return reg == other.reg;
+ case Type::Pred:
+ return pred == other.pred;
+ case Type::Attribute:
+ return attribute == other.attribute;
+ case Type::Patch:
+ return patch == other.patch;
+ case Type::U1:
+ return imm_u1 == other.imm_u1;
+ case Type::U8:
+ return imm_u8 == other.imm_u8;
+ case Type::U16:
+ case Type::F16:
+ return imm_u16 == other.imm_u16;
+ case Type::U32:
+ case Type::F32:
+ return imm_u32 == other.imm_u32;
+ case Type::U64:
+ case Type::F64:
+ return imm_u64 == other.imm_u64;
+ case Type::U32x2:
+ case Type::U32x3:
+ case Type::U32x4:
+ case Type::F16x2:
+ case Type::F16x3:
+ case Type::F16x4:
+ case Type::F32x2:
+ case Type::F32x3:
+ case Type::F32x4:
+ case Type::F64x2:
+ case Type::F64x3:
+ case Type::F64x4:
+ break;
+ }
+ throw LogicError("Invalid type {}", type);
+}
+
+bool Value::operator!=(const Value& other) const {
+ return !operator==(other);
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
new file mode 100644
index 000000000..795194d41
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -0,0 +1,398 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/list.hpp>
+
+#include "common/assert.h"
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/attribute.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/patch.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::IR {
+
+class Block;
+class Inst;
+
+struct AssociatedInsts;
+
+class Value {
+public:
+ Value() noexcept = default;
+ explicit Value(IR::Inst* value) noexcept;
+ explicit Value(IR::Reg value) noexcept;
+ explicit Value(IR::Pred value) noexcept;
+ explicit Value(IR::Attribute value) noexcept;
+ explicit Value(IR::Patch value) noexcept;
+ explicit Value(bool value) noexcept;
+ explicit Value(u8 value) noexcept;
+ explicit Value(u16 value) noexcept;
+ explicit Value(u32 value) noexcept;
+ explicit Value(f32 value) noexcept;
+ explicit Value(u64 value) noexcept;
+ explicit Value(f64 value) noexcept;
+
+ [[nodiscard]] bool IsIdentity() const noexcept;
+ [[nodiscard]] bool IsPhi() const noexcept;
+ [[nodiscard]] bool IsEmpty() const noexcept;
+ [[nodiscard]] bool IsImmediate() const noexcept;
+ [[nodiscard]] IR::Type Type() const noexcept;
+
+ [[nodiscard]] IR::Inst* Inst() const;
+ [[nodiscard]] IR::Inst* InstRecursive() const;
+ [[nodiscard]] IR::Value Resolve() const;
+ [[nodiscard]] IR::Reg Reg() const;
+ [[nodiscard]] IR::Pred Pred() const;
+ [[nodiscard]] IR::Attribute Attribute() const;
+ [[nodiscard]] IR::Patch Patch() const;
+ [[nodiscard]] bool U1() const;
+ [[nodiscard]] u8 U8() const;
+ [[nodiscard]] u16 U16() const;
+ [[nodiscard]] u32 U32() const;
+ [[nodiscard]] f32 F32() const;
+ [[nodiscard]] u64 U64() const;
+ [[nodiscard]] f64 F64() const;
+
+ [[nodiscard]] bool operator==(const Value& other) const;
+ [[nodiscard]] bool operator!=(const Value& other) const;
+
+private:
+ IR::Type type{};
+ union {
+ IR::Inst* inst{};
+ IR::Reg reg;
+ IR::Pred pred;
+ IR::Attribute attribute;
+ IR::Patch patch;
+ bool imm_u1;
+ u8 imm_u8;
+ u16 imm_u16;
+ u32 imm_u32;
+ f32 imm_f32;
+ u64 imm_u64;
+ f64 imm_f64;
+ };
+};
+static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
+static_assert(std::is_trivially_copyable_v<Value>);
+
+template <IR::Type type_>
+class TypedValue : public Value {
+public:
+ TypedValue() = default;
+
+ template <IR::Type other_type>
+ requires((other_type & type_) != IR::Type::Void) explicit(false)
+ TypedValue(const TypedValue<other_type>& value)
+ : Value(value) {}
+
+ explicit TypedValue(const Value& value) : Value(value) {
+ if ((value.Type() & type_) == IR::Type::Void) {
+ throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
+ }
+ }
+
+ explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
+};
+
+class Inst : public boost::intrusive::list_base_hook<> {
+public:
+ explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
+ ~Inst();
+
+ Inst& operator=(const Inst&) = delete;
+ Inst(const Inst&) = delete;
+
+ Inst& operator=(Inst&&) = delete;
+ Inst(Inst&&) = delete;
+
+ /// Get the number of uses this instruction has.
+ [[nodiscard]] int UseCount() const noexcept {
+ return use_count;
+ }
+
+ /// Determines whether this instruction has uses or not.
+ [[nodiscard]] bool HasUses() const noexcept {
+ return use_count > 0;
+ }
+
+ /// Get the opcode this microinstruction represents.
+ [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
+ return op;
+ }
+
+ /// Determines if there is a pseudo-operation associated with this instruction.
+ [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
+ return associated_insts != nullptr;
+ }
+
+ /// Determines whether or not this instruction may have side effects.
+ [[nodiscard]] bool MayHaveSideEffects() const noexcept;
+
+ /// Determines whether or not this instruction is a pseudo-instruction.
+ /// Pseudo-instructions depend on their parent instructions for their semantics.
+ [[nodiscard]] bool IsPseudoInstruction() const noexcept;
+
+ /// Determines if all arguments of this instruction are immediates.
+ [[nodiscard]] bool AreAllArgsImmediates() const;
+
+ /// Gets a pseudo-operation associated with this instruction
+ [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
+
+ /// Get the type this instruction returns.
+ [[nodiscard]] IR::Type Type() const;
+
+ /// Get the number of arguments this instruction has.
+ [[nodiscard]] size_t NumArgs() const {
+ return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
+ }
+
+ /// Get the value of a given argument index.
+ [[nodiscard]] Value Arg(size_t index) const noexcept {
+ if (op == IR::Opcode::Phi) {
+ return phi_args[index].second;
+ } else {
+ return args[index];
+ }
+ }
+
+ /// Set the value of a given argument index.
+ void SetArg(size_t index, Value value);
+
+ /// Get a pointer to the block of a phi argument.
+ [[nodiscard]] Block* PhiBlock(size_t index) const;
+ /// Add phi operand to a phi instruction.
+ void AddPhiOperand(Block* predecessor, const Value& value);
+
+ void Invalidate();
+ void ClearArgs();
+
+ void ReplaceUsesWith(Value replacement);
+
+ void ReplaceOpcode(IR::Opcode opcode);
+
+ template <typename FlagsType>
+ requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+ [[nodiscard]] FlagsType Flags() const noexcept {
+ FlagsType ret;
+ std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
+ return ret;
+ }
+
+ template <typename FlagsType>
+ requires(sizeof(FlagsType) <= sizeof(u32) &&
+ std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept {
+ std::memcpy(&flags, &value, sizeof(value));
+ }
+
+ /// Intrusively store the host definition of this instruction.
+ template <typename DefinitionType>
+ void SetDefinition(DefinitionType def) {
+ definition = Common::BitCast<u32>(def);
+ }
+
+ /// Return the intrusively stored host definition of this instruction.
+ template <typename DefinitionType>
+ [[nodiscard]] DefinitionType Definition() const noexcept {
+ return Common::BitCast<DefinitionType>(definition);
+ }
+
+ /// Destructively remove one reference count from the instruction
+ /// Useful for register allocation
+ void DestructiveRemoveUsage() {
+ --use_count;
+ }
+
+ /// Destructively add usages to the instruction
+ /// Useful for register allocation
+ void DestructiveAddUsage(int count) {
+ use_count += count;
+ }
+
+private:
+ struct NonTriviallyDummy {
+ NonTriviallyDummy() noexcept {}
+ };
+
+ void Use(const Value& value);
+ void UndoUse(const Value& value);
+
+ IR::Opcode op{};
+ int use_count{};
+ u32 flags{};
+ u32 definition{};
+ union {
+ NonTriviallyDummy dummy{};
+ boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
+ std::array<Value, 5> args;
+ };
+ std::unique_ptr<AssociatedInsts> associated_insts;
+};
+static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
+
+struct AssociatedInsts {
+ union {
+ Inst* in_bounds_inst;
+ Inst* sparse_inst;
+ Inst* zero_inst{};
+ };
+ Inst* sign_inst{};
+ Inst* carry_inst{};
+ Inst* overflow_inst{};
+};
+
+using U1 = TypedValue<Type::U1>;
+using U8 = TypedValue<Type::U8>;
+using U16 = TypedValue<Type::U16>;
+using U32 = TypedValue<Type::U32>;
+using U64 = TypedValue<Type::U64>;
+using F16 = TypedValue<Type::F16>;
+using F32 = TypedValue<Type::F32>;
+using F64 = TypedValue<Type::F64>;
+using U32U64 = TypedValue<Type::U32 | Type::U64>;
+using F32F64 = TypedValue<Type::F32 | Type::F64>;
+using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
+using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
+using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
+
+inline bool Value::IsIdentity() const noexcept {
+ return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
+}
+
+inline bool Value::IsPhi() const noexcept {
+ return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
+}
+
+inline bool Value::IsEmpty() const noexcept {
+ return type == Type::Void;
+}
+
+inline bool Value::IsImmediate() const noexcept {
+ IR::Type current_type{type};
+ const IR::Inst* current_inst{inst};
+ while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
+ const Value& arg{current_inst->Arg(0)};
+ current_type = arg.type;
+ current_inst = arg.inst;
+ }
+ return current_type != Type::Opaque;
+}
+
+inline IR::Inst* Value::Inst() const {
+ DEBUG_ASSERT(type == Type::Opaque);
+ return inst;
+}
+
+inline IR::Inst* Value::InstRecursive() const {
+ DEBUG_ASSERT(type == Type::Opaque);
+ if (IsIdentity()) {
+ return inst->Arg(0).InstRecursive();
+ }
+ return inst;
+}
+
+inline IR::Value Value::Resolve() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).Resolve();
+ }
+ return *this;
+}
+
+inline IR::Reg Value::Reg() const {
+ DEBUG_ASSERT(type == Type::Reg);
+ return reg;
+}
+
+inline IR::Pred Value::Pred() const {
+ DEBUG_ASSERT(type == Type::Pred);
+ return pred;
+}
+
+inline IR::Attribute Value::Attribute() const {
+ DEBUG_ASSERT(type == Type::Attribute);
+ return attribute;
+}
+
+inline IR::Patch Value::Patch() const {
+ DEBUG_ASSERT(type == Type::Patch);
+ return patch;
+}
+
+inline bool Value::U1() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U1();
+ }
+ DEBUG_ASSERT(type == Type::U1);
+ return imm_u1;
+}
+
+inline u8 Value::U8() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U8();
+ }
+ DEBUG_ASSERT(type == Type::U8);
+ return imm_u8;
+}
+
+inline u16 Value::U16() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U16();
+ }
+ DEBUG_ASSERT(type == Type::U16);
+ return imm_u16;
+}
+
+inline u32 Value::U32() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U32();
+ }
+ DEBUG_ASSERT(type == Type::U32);
+ return imm_u32;
+}
+
+inline f32 Value::F32() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).F32();
+ }
+ DEBUG_ASSERT(type == Type::F32);
+ return imm_f32;
+}
+
+inline u64 Value::U64() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).U64();
+ }
+ DEBUG_ASSERT(type == Type::U64);
+ return imm_u64;
+}
+
+inline f64 Value::F64() const {
+ if (IsIdentity()) {
+ return inst->Arg(0).F64();
+ }
+ DEBUG_ASSERT(type == Type::F64);
+ return imm_f64;
+}
+
+[[nodiscard]] inline bool IsPhi(const Inst& inst) {
+ return inst.GetOpcode() == Opcode::Phi;
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..efe457baa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include <fmt/format.h>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell::Flow {
+namespace {
+struct Compare {
+ bool operator()(const Block& lhs, Location rhs) const noexcept {
+ return lhs.begin < rhs;
+ }
+
+ bool operator()(Location lhs, const Block& rhs) const noexcept {
+ return lhs < rhs.begin;
+ }
+
+ bool operator()(const Block& lhs, const Block& rhs) const noexcept {
+ return lhs.begin < rhs.begin;
+ }
+};
+
+u32 BranchOffset(Location pc, Instruction inst) {
+ return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
+}
+
+void Split(Block* old_block, Block* new_block, Location pc) {
+ if (pc <= old_block->begin || pc >= old_block->end) {
+ throw InvalidArgument("Invalid address to split={}", pc);
+ }
+ *new_block = Block{};
+ new_block->begin = pc;
+ new_block->end = old_block->end;
+ new_block->end_class = old_block->end_class;
+ new_block->cond = old_block->cond;
+ new_block->stack = old_block->stack;
+ new_block->branch_true = old_block->branch_true;
+ new_block->branch_false = old_block->branch_false;
+ new_block->function_call = old_block->function_call;
+ new_block->return_block = old_block->return_block;
+ new_block->branch_reg = old_block->branch_reg;
+ new_block->branch_offset = old_block->branch_offset;
+ new_block->indirect_branches = std::move(old_block->indirect_branches);
+
+ const Location old_begin{old_block->begin};
+ Stack old_stack{std::move(old_block->stack)};
+ *old_block = Block{};
+ old_block->begin = old_begin;
+ old_block->end = pc;
+ old_block->end_class = EndClass::Branch;
+ old_block->cond = IR::Condition(true);
+ old_block->stack = old_stack;
+ old_block->branch_true = new_block;
+ old_block->branch_false = nullptr;
+}
+
+Token OpcodeToken(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::PBK:
+ case Opcode::BRK:
+ return Token::PBK;
+ case Opcode::PCNT:
+ case Opcode::CONT:
+ return Token::PCNT;
+ case Opcode::PEXIT:
+ case Opcode::EXIT:
+ return Token::PEXIT;
+ case Opcode::PLONGJMP:
+ case Opcode::LONGJMP:
+ return Token::PLONGJMP;
+ case Opcode::PRET:
+ case Opcode::RET:
+ case Opcode::CAL:
+ return Token::PRET;
+ case Opcode::SSY:
+ case Opcode::SYNC:
+ return Token::SSY;
+ default:
+ throw InvalidArgument("{}", opcode);
+ }
+}
+
+bool IsAbsoluteJump(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::JCAL:
+ case Opcode::JMP:
+ case Opcode::JMX:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool HasFlowTest(Opcode opcode) {
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::BRX:
+ case Opcode::EXIT:
+ case Opcode::JMP:
+ case Opcode::JMX:
+ case Opcode::KIL:
+ case Opcode::BRK:
+ case Opcode::CONT:
+ case Opcode::LONGJMP:
+ case Opcode::RET:
+ case Opcode::SYNC:
+ return true;
+ case Opcode::CAL:
+ case Opcode::JCAL:
+ return false;
+ default:
+ throw InvalidArgument("Invalid branch {}", opcode);
+ }
+}
+
+std::string NameOf(const Block& block) {
+ if (block.begin.IsVirtual()) {
+ return fmt::format("\"Virtual {}\"", block.begin);
+ } else {
+ return fmt::format("\"{}\"", block.begin);
+ }
+}
+} // Anonymous namespace
+
+void Stack::Push(Token token, Location target) {
+ entries.push_back({
+ .token = token,
+ .target{target},
+ });
+}
+
+std::pair<Location, Stack> Stack::Pop(Token token) const {
+ const std::optional<Location> pc{Peek(token)};
+ if (!pc) {
+ throw LogicError("Token could not be found");
+ }
+ return {*pc, Remove(token)};
+}
+
+std::optional<Location> Stack::Peek(Token token) const {
+ const auto it{std::find_if(entries.rbegin(), entries.rend(),
+ [token](const auto& entry) { return entry.token == token; })};
+ if (it == entries.rend()) {
+ return std::nullopt;
+ }
+ return it->target;
+}
+
+Stack Stack::Remove(Token token) const {
+ const auto it{std::find_if(entries.rbegin(), entries.rend(),
+ [token](const auto& entry) { return entry.token == token; })};
+ const auto pos{std::distance(entries.rbegin(), it)};
+ Stack result;
+ result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
+ return result;
+}
+
+bool Block::Contains(Location pc) const noexcept {
+ return pc >= begin && pc < end;
+}
+
+Function::Function(ObjectPool<Block>& block_pool, Location start_address)
+ : entrypoint{start_address} {
+ Label& label{labels.emplace_back()};
+ label.address = start_address;
+ label.block = block_pool.Create(Block{});
+ label.block->begin = start_address;
+ label.block->end = start_address;
+ label.block->end_class = EndClass::Branch;
+ label.block->cond = IR::Condition(true);
+ label.block->branch_true = nullptr;
+ label.block->branch_false = nullptr;
+}
+
+CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
+ bool exits_to_dispatcher_)
+ : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
+ exits_to_dispatcher_} {
+ if (exits_to_dispatcher) {
+ dispatch_block = block_pool.Create(Block{});
+ dispatch_block->begin = {};
+ dispatch_block->end = {};
+ dispatch_block->end_class = EndClass::Exit;
+ dispatch_block->cond = IR::Condition(true);
+ dispatch_block->stack = {};
+ dispatch_block->branch_true = nullptr;
+ dispatch_block->branch_false = nullptr;
+ }
+ functions.emplace_back(block_pool, start_address);
+ for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
+ while (!functions[function_id].labels.empty()) {
+ Function& function{functions[function_id]};
+ Label label{function.labels.back()};
+ function.labels.pop_back();
+ AnalyzeLabel(function_id, label);
+ }
+ }
+ if (exits_to_dispatcher) {
+ const auto last_block{functions[0].blocks.rbegin()};
+ dispatch_block->begin = last_block->end + 1;
+ dispatch_block->end = last_block->end + 1;
+ functions[0].blocks.insert(*dispatch_block);
+ }
+}
+
+void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
+ if (InspectVisitedBlocks(function_id, label)) {
+ // Label address has been visited
+ return;
+ }
+ // Try to find the next block
+ Function* const function{&functions[function_id]};
+ Location pc{label.address};
+ const auto next_it{function->blocks.upper_bound(pc, Compare{})};
+ const bool is_last{next_it == function->blocks.end()};
+ Block* const next{is_last ? nullptr : &*next_it};
+ // Insert before the next block
+ Block* const block{label.block};
+ // Analyze instructions until it reaches an already visited block or there's a branch
+ bool is_branch{false};
+ while (!next || pc < next->begin) {
+ is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
+ if (is_branch) {
+ break;
+ }
+ ++pc;
+ }
+ if (!is_branch) {
+ // If the block finished without a branch,
+ // it means that the next instruction is already visited, jump to it
+ block->end = pc;
+ block->cond = IR::Condition{true};
+ block->branch_true = next;
+ block->branch_false = nullptr;
+ }
+ // Function's pointer might be invalid, resolve it again
+ // Insert the new block
+ functions[function_id].blocks.insert(*block);
+}
+
+bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
+ const Location pc{label.address};
+ Function& function{functions[function_id]};
+ const auto it{
+ std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
+ if (it == function.blocks.end()) {
+ // Address has not been visited
+ return false;
+ }
+ Block* const visited_block{&*it};
+ if (visited_block->begin == pc) {
+ throw LogicError("Dangling block");
+ }
+ Block* const new_block{label.block};
+ Split(visited_block, new_block, pc);
+ function.blocks.insert(it, *new_block);
+ return true;
+}
+
+CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
+ const Instruction inst{env.ReadInstruction(pc.Offset())};
+ const Opcode opcode{Decode(inst.raw)};
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::JMP:
+ case Opcode::RET:
+ if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+ return AnalysisState::Continue;
+ }
+ switch (opcode) {
+ case Opcode::BRA:
+ case Opcode::JMP:
+ AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
+ break;
+ case Opcode::RET:
+ block->end_class = EndClass::Return;
+ break;
+ default:
+ break;
+ }
+ block->end = pc;
+ return AnalysisState::Branch;
+ case Opcode::BRK:
+ case Opcode::CONT:
+ case Opcode::LONGJMP:
+ case Opcode::SYNC: {
+ if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
+ return AnalysisState::Continue;
+ }
+ const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
+ block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
+ block->end = pc;
+ return AnalysisState::Branch;
+ }
+ case Opcode::KIL: {
+ const Predicate pred{inst.Pred()};
+ const auto ir_pred{static_cast<IR::Pred>(pred.index)};
+ const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
+ AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
+ return AnalysisState::Branch;
+ }
+ case Opcode::PBK:
+ case Opcode::PCNT:
+ case Opcode::PEXIT:
+ case Opcode::PLONGJMP:
+ case Opcode::SSY:
+ block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
+ return AnalysisState::Continue;
+ case Opcode::BRX:
+ case Opcode::JMX:
+ return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
+ case Opcode::EXIT:
+ return AnalyzeEXIT(block, function_id, pc, inst);
+ case Opcode::PRET:
+ throw NotImplementedException("PRET flow analysis");
+ case Opcode::CAL:
+ case Opcode::JCAL: {
+ const bool is_absolute{IsAbsoluteJump(opcode)};
+ const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+ // Technically CAL pushes into PRET, but that's implicit in the function call for us
+ // Insert the function into the list if it doesn't exist
+ const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
+ const bool exists{it != functions.end()};
+ const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
+ : functions.size()};
+ if (!exists) {
+ functions.emplace_back(block_pool, cal_pc);
+ }
+ block->end_class = EndClass::Call;
+ block->function_call = call_id;
+ block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
+ block->end = pc;
+ return AnalysisState::Branch;
+ }
+ default:
+ break;
+ }
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{true} || pred == Predicate{false}) {
+ return AnalysisState::Continue;
+ }
+ const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
+ AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
+ return AnalysisState::Branch;
+}
+
+void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
+ EndClass insn_end_class, IR::Condition cond) {
+ if (block->begin != pc) {
+ // If the block doesn't start in the conditional instruction
+ // mark it as a label to visit it later
+ block->end = pc;
+ block->cond = IR::Condition{true};
+ block->branch_true = AddLabel(block, block->stack, pc, function_id);
+ block->branch_false = nullptr;
+ return;
+ }
+ // Create a virtual block and a conditional block
+ Block* const conditional_block{block_pool.Create()};
+ Block virtual_block{};
+ virtual_block.begin = block->begin.Virtual();
+ virtual_block.end = block->begin.Virtual();
+ virtual_block.end_class = EndClass::Branch;
+ virtual_block.stack = block->stack;
+ virtual_block.cond = cond;
+ virtual_block.branch_true = conditional_block;
+ virtual_block.branch_false = nullptr;
+ // Save the contents of the visited block in the conditional block
+ *conditional_block = std::move(*block);
+ // Impersonate the visited block with a virtual block
+ *block = std::move(virtual_block);
+ // Set the end properties of the conditional instruction
+ conditional_block->end = pc + 1;
+ conditional_block->end_class = insn_end_class;
+ // Add a label to the instruction after the conditional instruction
+ Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
+ // Branch to the next instruction from the virtual block
+ block->branch_false = endif_block;
+ // And branch to it from the conditional instruction if it is a branch or a kill instruction
+ // Kill instructions are considered a branch because they demote to a helper invocation and
+ // execution may continue.
+ if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
+ conditional_block->cond = IR::Condition{true};
+ conditional_block->branch_true = endif_block;
+ conditional_block->branch_false = nullptr;
+ }
+ // Finally insert the condition block into the list of blocks
+ functions[function_id].blocks.insert(*conditional_block);
+}
+
+bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ Opcode opcode) {
+ if (inst.branch.is_cbuf) {
+ throw NotImplementedException("Branch with constant buffer offset");
+ }
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{false}) {
+ return false;
+ }
+ const bool has_flow_test{HasFlowTest(opcode)};
+ const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
+ if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+ block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
+ block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+ } else {
+ block->cond = IR::Condition{true};
+ }
+ return true;
+}
+
+void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ bool is_absolute) {
+ const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
+ block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
+}
+
+CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+ FunctionId function_id) {
+ const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
+ if (!brx_table) {
+ TrackIndirectBranchTable(env, pc, program_start);
+ throw NotImplementedException("Failed to track indirect branch");
+ }
+ const IR::FlowTest flow_test{inst.branch.flow_test};
+ const Predicate pred{inst.Pred()};
+ if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
+ throw NotImplementedException("Conditional indirect branch");
+ }
+ std::vector<u32> targets;
+ targets.reserve(brx_table->num_entries);
+ for (u32 i = 0; i < brx_table->num_entries; ++i) {
+ u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
+ if (!is_absolute) {
+ target += pc.Offset();
+ }
+ target += static_cast<u32>(brx_table->branch_offset);
+ target += 8;
+ targets.push_back(target);
+ }
+ std::ranges::sort(targets);
+ targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
+
+ block->indirect_branches.reserve(targets.size());
+ for (const u32 target : targets) {
+ Block* const branch{AddLabel(block, block->stack, target, function_id)};
+ block->indirect_branches.push_back({
+ .block = branch,
+ .address = target,
+ });
+ }
+ block->cond = IR::Condition{true};
+ block->end = pc + 1;
+ block->end_class = EndClass::IndirectBranch;
+ block->branch_reg = brx_table->branch_reg;
+ block->branch_offset = brx_table->branch_offset + 8;
+ if (!is_absolute) {
+ block->branch_offset += pc.Offset();
+ }
+ return AnalysisState::Branch;
+}
+
+CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
+ Instruction inst) {
+ const IR::FlowTest flow_test{inst.branch.flow_test};
+ const Predicate pred{inst.Pred()};
+ if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
+ // EXIT will never be taken
+ return AnalysisState::Continue;
+ }
+ if (exits_to_dispatcher && function_id != 0) {
+ throw NotImplementedException("Dispatch EXIT on external function");
+ }
+ if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
+ if (block->stack.Peek(Token::PEXIT).has_value()) {
+ throw NotImplementedException("Conditional EXIT with PEXIT token");
+ }
+ const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
+ if (exits_to_dispatcher) {
+ block->end = pc;
+ block->end_class = EndClass::Branch;
+ block->cond = cond;
+ block->branch_true = dispatch_block;
+ block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
+ return AnalysisState::Branch;
+ }
+ AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
+ return AnalysisState::Branch;
+ }
+ if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
+ const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
+ block->cond = IR::Condition{true};
+ block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
+ block->branch_false = nullptr;
+ return AnalysisState::Branch;
+ }
+ if (exits_to_dispatcher) {
+ block->cond = IR::Condition{true};
+ block->end = pc;
+ block->end_class = EndClass::Branch;
+ block->branch_true = dispatch_block;
+ block->branch_false = nullptr;
+ return AnalysisState::Branch;
+ }
+ block->end = pc + 1;
+ block->end_class = EndClass::Exit;
+ return AnalysisState::Branch;
+}
+
+Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
+ Function& function{functions[function_id]};
+ if (block->begin == pc) {
+ // Jumps to itself
+ return block;
+ }
+ if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
+ // Block already exists and it has been visited
+ if (function.blocks.begin() != it) {
+ // Check if the previous node is the virtual variant of the label
+ // This won't exist if a virtual node is not needed or it hasn't been visited
+ // If it hasn't been visited and a virtual node is needed, this will still behave as
+ // expected because the node impersonated with its virtual node.
+ const auto prev{std::prev(it)};
+ if (it->begin.Virtual() == prev->begin) {
+ return &*prev;
+ }
+ }
+ return &*it;
+ }
+ // Make sure we don't insert the same layer twice
+ const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
+ if (label_it != function.labels.end()) {
+ return label_it->block;
+ }
+ Block* const new_block{block_pool.Create()};
+ new_block->begin = pc;
+ new_block->end = pc;
+ new_block->end_class = EndClass::Branch;
+ new_block->cond = IR::Condition(true);
+ new_block->stack = stack;
+ new_block->branch_true = nullptr;
+ new_block->branch_false = nullptr;
+ function.labels.push_back(Label{
+ .address{pc},
+ .block = new_block,
+ .stack{std::move(stack)},
+ });
+ return new_block;
+}
+
+std::string CFG::Dot() const {
+ int node_uid{0};
+
+ std::string dot{"digraph shader {\n"};
+ for (const Function& function : functions) {
+ dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
+ dot += fmt::format("\t\tnode [style=filled];\n");
+ for (const Block& block : function.blocks) {
+ const std::string name{NameOf(block)};
+ const auto add_branch = [&](Block* branch, bool add_label) {
+ dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
+ if (add_label && block.cond != IR::Condition{true} &&
+ block.cond != IR::Condition{false}) {
+ dot += fmt::format(" [label=\"{}\"]", block.cond);
+ }
+ dot += '\n';
+ };
+ dot += fmt::format("\t\t{};\n", name);
+ switch (block.end_class) {
+ case EndClass::Branch:
+ if (block.cond != IR::Condition{false}) {
+ add_branch(block.branch_true, true);
+ }
+ if (block.cond != IR::Condition{true}) {
+ add_branch(block.branch_false, false);
+ }
+ break;
+ case EndClass::IndirectBranch:
+ for (const IndirectBranch& branch : block.indirect_branches) {
+ add_branch(branch.block, false);
+ }
+ break;
+ case EndClass::Call:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
+ dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
+ node_uid, block.function_call);
+ dot += '\n';
+ ++node_uid;
+ break;
+ case EndClass::Exit:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ case EndClass::Return:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ case EndClass::Kill:
+ dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
+ dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
+ node_uid);
+ ++node_uid;
+ break;
+ }
+ }
+ if (function.entrypoint == 8) {
+ dot += fmt::format("\t\tlabel = \"main\";\n");
+ } else {
+ dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
+ }
+ dot += "\t}\n";
+ }
+ if (!functions.empty()) {
+ auto& function{functions.front()};
+ if (function.blocks.empty()) {
+ dot += "Start;\n";
+ } else {
+ dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
+ }
+ dot += fmt::format("\tStart [shape=diamond];\n");
+ }
+ dot += "}\n";
+ return dot;
+}
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <optional>
+#include <span>
+#include <string>
+#include <vector>
+
+#include <boost/container/small_vector.hpp>
+#include <boost/intrusive/set.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/condition.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell::Flow {
+
+struct Block;
+
+using FunctionId = size_t;
+
+enum class EndClass {
+ Branch,
+ IndirectBranch,
+ Call,
+ Exit,
+ Return,
+ Kill,
+};
+
+enum class Token {
+ SSY,
+ PBK,
+ PEXIT,
+ PRET,
+ PCNT,
+ PLONGJMP,
+};
+
+struct StackEntry {
+ auto operator<=>(const StackEntry&) const noexcept = default;
+
+ Token token;
+ Location target;
+};
+
+class Stack {
+public:
+ void Push(Token token, Location target);
+ [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
+ [[nodiscard]] std::optional<Location> Peek(Token token) const;
+ [[nodiscard]] Stack Remove(Token token) const;
+
+private:
+ boost::container::small_vector<StackEntry, 3> entries;
+};
+
+struct IndirectBranch {
+ Block* block;
+ u32 address;
+};
+
+struct Block : boost::intrusive::set_base_hook<
+ // Normal link is ~2.5% faster compared to safe link
+ boost::intrusive::link_mode<boost::intrusive::normal_link>> {
+ [[nodiscard]] bool Contains(Location pc) const noexcept;
+
+ bool operator<(const Block& rhs) const noexcept {
+ return begin < rhs.begin;
+ }
+
+ Location begin;
+ Location end;
+ EndClass end_class{};
+ IR::Condition cond{};
+ Stack stack;
+ Block* branch_true{};
+ Block* branch_false{};
+ FunctionId function_call{};
+ Block* return_block{};
+ IR::Reg branch_reg{};
+ s32 branch_offset{};
+ std::vector<IndirectBranch> indirect_branches;
+};
+
+struct Label {
+ Location address;
+ Block* block;
+ Stack stack;
+};
+
+struct Function {
+ explicit Function(ObjectPool<Block>& block_pool, Location start_address);
+
+ Location entrypoint;
+ boost::container::small_vector<Label, 16> labels;
+ boost::intrusive::set<Block> blocks;
+};
+
+class CFG {
+ enum class AnalysisState {
+ Branch,
+ Continue,
+ };
+
+public:
+ explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
+ bool exits_to_dispatcher = false);
+
+ CFG& operator=(const CFG&) = delete;
+ CFG(const CFG&) = delete;
+
+ CFG& operator=(CFG&&) = delete;
+ CFG(CFG&&) = delete;
+
+ [[nodiscard]] std::string Dot() const;
+
+ [[nodiscard]] std::span<const Function> Functions() const noexcept {
+ return std::span(functions.data(), functions.size());
+ }
+ [[nodiscard]] std::span<Function> Functions() noexcept {
+ return std::span(functions.data(), functions.size());
+ }
+
+ [[nodiscard]] bool ExitsToDispatcher() const {
+ return exits_to_dispatcher;
+ }
+
+private:
+ void AnalyzeLabel(FunctionId function_id, Label& label);
+
+ /// Inspect already visited blocks.
+ /// Return true when the block has already been visited
+ bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
+
+ AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
+
+ void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
+ IR::Condition cond);
+
+ /// Return true when the branch instruction is confirmed to be a branch
+ bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ Opcode opcode);
+
+ void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
+ bool is_absolute);
+ AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
+ FunctionId function_id);
+ AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
+
+ /// Return the branch target block id
+ Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
+
+ Environment& env;
+ ObjectPool<Block>& block_pool;
+ boost::container::small_vector<Function, 1> functions;
+ Location program_start;
+ bool exits_to_dispatcher{};
+ Block* dispatch_block{};
+};
+
+} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <memory>
+#include <string_view>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct MaskValue {
+ u64 mask;
+ u64 value;
+};
+
+constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
+ u64 mask{};
+ u64 value{};
+ u64 bit{u64(1) << 63};
+ while (*encoding) {
+ switch (*encoding) {
+ case '0':
+ mask |= bit;
+ break;
+ case '1':
+ mask |= bit;
+ value |= bit;
+ break;
+ case '-':
+ break;
+ case ' ':
+ break;
+ default:
+ throw LogicError("Invalid encoding character '{}'", *encoding);
+ }
+ ++encoding;
+ if (*encoding != ' ') {
+ bit >>= 1;
+ }
+ }
+ return MaskValue{.mask = mask, .value = value};
+}
+
+struct InstEncoding {
+ MaskValue mask_value;
+ Opcode opcode;
+};
+constexpr std::array UNORDERED_ENCODINGS{
+#define INST(name, cute, encode) \
+ InstEncoding{ \
+ .mask_value{MaskValueFromEncoding(encode)}, \
+ .opcode = Opcode::name, \
+ },
+#include "maxwell.inc"
+#undef INST
+};
+
+constexpr auto SortedEncodings() {
+ std::array encodings{UNORDERED_ENCODINGS};
+ std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
+ return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
+ });
+ return encodings;
+}
+constexpr auto ENCODINGS{SortedEncodings()};
+
+constexpr int WidestLeftBits() {
+ int bits{64};
+ for (const InstEncoding& encoding : ENCODINGS) {
+ bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
+ }
+ return 64 - bits;
+}
+constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
+constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
+
+constexpr size_t ToFastLookupIndex(u64 value) {
+ return static_cast<size_t>(value >> MASK_SHIFT);
+}
+
+constexpr size_t FastLookupSize() {
+ size_t max_width{};
+ for (const InstEncoding& encoding : ENCODINGS) {
+ max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
+ }
+ return max_width + 1;
+}
+constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
+
+struct InstInfo {
+ [[nodiscard]] u64 Mask() const noexcept {
+ return static_cast<u64>(high_mask) << MASK_SHIFT;
+ }
+
+ [[nodiscard]] u64 Value() const noexcept {
+ return static_cast<u64>(high_value) << MASK_SHIFT;
+ }
+
+ u16 high_mask;
+ u16 high_value;
+ Opcode opcode;
+};
+
+constexpr auto MakeFastLookupTableIndex(size_t index) {
+ std::array<InstInfo, 2> encodings{};
+ size_t element{};
+ for (const auto& encoding : ENCODINGS) {
+ const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
+ const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
+ if ((index & mask) == value) {
+ encodings.at(element) = InstInfo{
+ .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
+ .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
+ .opcode = encoding.opcode,
+ };
+ ++element;
+ }
+ }
+ return encodings;
+}
+
+/*constexpr*/ auto MakeFastLookupTable() {
+ auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
+ for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
+ (*encodings)[index] = MakeFastLookupTableIndex(index);
+ }
+ return encodings;
+}
+const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
+} // Anonymous namespace
+
+Opcode Decode(u64 insn) {
+ const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
+ const auto it{std::ranges::find_if(
+ table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
+ if (it == table.end()) {
+ throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
+ }
+ return it->opcode;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] Opcode Decode(u64 insn);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+namespace {
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 19, u64> immediate;
+ BitField<56, 1, u64> is_negative;
+ BitField<20, 24, s64> brx_offset;
+};
+
+template <typename Callable>
+std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
+ while (pos >= block_begin) {
+ const u64 insn{env.ReadInstruction(pos.Offset())};
+ --pos;
+ if (func(insn, Decode(insn))) {
+ return insn;
+ }
+ }
+ return std::nullopt;
+}
+
+std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
+ IR::Reg brx_reg) {
+ return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
+ const LDC::Encoding ldc{insn};
+ return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
+ ldc.mode == LDC::Mode::Default;
+ });
+}
+
+std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
+ IR::Reg ldc_reg) {
+ return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
+ const Encoding shl{insn};
+ return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
+ });
+}
+
+std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
+ IR::Reg shl_reg) {
+ return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
+ const Encoding imnmx{insn};
+ return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
+ });
+}
+} // Anonymous namespace
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+ Location block_begin) {
+ const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
+ const Opcode brx_opcode{Decode(brx_insn)};
+ if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
+ throw LogicError("Tracked instruction is not BRX or JMX");
+ }
+ const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
+ const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
+
+ Location pos{brx_pos};
+ const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
+ if (!ldc_insn) {
+ return std::nullopt;
+ }
+ const LDC::Encoding ldc{*ldc_insn};
+ const u32 cbuf_index{static_cast<u32>(ldc.index)};
+ const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
+ const IR::Reg ldc_reg{ldc.src_reg};
+
+ const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
+ if (!shl_insn) {
+ return std::nullopt;
+ }
+ const Encoding shl{*shl_insn};
+ const IR::Reg shl_reg{shl.src_reg};
+
+ const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
+ if (!imnmx_insn) {
+ return std::nullopt;
+ }
+ const Encoding imnmx{*imnmx_insn};
+ if (imnmx.is_negative != 0) {
+ return std::nullopt;
+ }
+ const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
+ return IndirectBranchTableInfo{
+ .cbuf_index = cbuf_index,
+ .cbuf_offset = cbuf_offset,
+ .num_entries = imnmx_immediate + 1,
+ .branch_offset = brx_offset,
+ .branch_reg = brx_reg,
+ };
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+
+namespace Shader::Maxwell {
+
+struct IndirectBranchTableInfo {
+ u32 cbuf_index{};
+ u32 cbuf_offset{};
+ u32 num_entries{};
+ s32 branch_offset{};
+ IR::Reg branch_reg{};
+};
+
+std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
+ Location block_begin);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/flow_test.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell {
+
+struct Predicate {
+ Predicate() = default;
+ Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
+ Predicate(bool value) : index{7}, negated{!value} {}
+ Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
+
+ unsigned index;
+ bool negated;
+};
+
+inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
+ return lhs.index == rhs.index && lhs.negated == rhs.negated;
+}
+
+inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
+ return !(lhs == rhs);
+}
+
+union Instruction {
+ Instruction(u64 raw_) : raw{raw_} {}
+
+ u64 raw;
+
+ union {
+ BitField<5, 1, u64> is_cbuf;
+ BitField<0, 5, IR::FlowTest> flow_test;
+
+ [[nodiscard]] u32 Absolute() const noexcept {
+ return static_cast<u32>(absolute);
+ }
+
+ [[nodiscard]] s32 Offset() const noexcept {
+ return static_cast<s32>(offset);
+ }
+
+ private:
+ BitField<20, 24, s64> offset;
+ BitField<20, 32, u64> absolute;
+ } branch;
+
+ [[nodiscard]] Predicate Pred() const noexcept {
+ return Predicate{pred};
+ }
+
+private:
+ BitField<16, 4, u64> pred;
+};
+static_assert(std::is_trivially_copyable_v<Instruction>);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <compare>
+#include <iterator>
+
+#include <fmt/format.h>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+
+namespace Shader::Maxwell {
+
+class Location {
+ static constexpr u32 VIRTUAL_BIAS{4};
+
+public:
+ constexpr Location() = default;
+
+ constexpr Location(u32 initial_offset) : offset{initial_offset} {
+ if (initial_offset % 8 != 0) {
+ throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
+ }
+ Align();
+ }
+
+ constexpr Location Virtual() const noexcept {
+ Location virtual_location;
+ virtual_location.offset = offset - VIRTUAL_BIAS;
+ return virtual_location;
+ }
+
+ [[nodiscard]] constexpr u32 Offset() const noexcept {
+ return offset;
+ }
+
+ [[nodiscard]] constexpr bool IsVirtual() const {
+ return offset % 8 == VIRTUAL_BIAS;
+ }
+
+ constexpr auto operator<=>(const Location&) const noexcept = default;
+
+ constexpr Location operator++() noexcept {
+ const Location copy{*this};
+ Step();
+ return copy;
+ }
+
+ constexpr Location operator++(int) noexcept {
+ Step();
+ return *this;
+ }
+
+ constexpr Location operator--() noexcept {
+ const Location copy{*this};
+ Back();
+ return copy;
+ }
+
+ constexpr Location operator--(int) noexcept {
+ Back();
+ return *this;
+ }
+
+ constexpr Location operator+(int number) const {
+ Location new_pc{*this};
+ while (number > 0) {
+ --number;
+ ++new_pc;
+ }
+ while (number < 0) {
+ ++number;
+ --new_pc;
+ }
+ return new_pc;
+ }
+
+ constexpr Location operator-(int number) const {
+ return operator+(-number);
+ }
+
+private:
+ constexpr void Align() {
+ offset += offset % 32 == 0 ? 8 : 0;
+ }
+
+ constexpr void Step() {
+ offset += 8 + (offset % 32 == 24 ? 8 : 0);
+ }
+
+ constexpr void Back() {
+ offset -= 8 + (offset % 32 == 8 ? 8 : 0);
+ }
+
+ u32 offset{0xcccccccc};
+};
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Location> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
+ return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
+ }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+INST(AL2P, "AL2P", "1110 1111 1010 0---")
+INST(ALD, "ALD", "1110 1111 1101 1---")
+INST(AST, "AST", "1110 1111 1111 0---")
+INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
+INST(ATOM, "ATOM", "1110 1101 ---- ----")
+INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
+INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
+INST(B2R, "B2R", "1111 0000 1011 1---")
+INST(BAR, "BAR", "1111 0000 1010 1---")
+INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
+INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
+INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
+INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
+INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
+INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
+INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
+INST(BPT, "BPT", "1110 0011 1010 ----")
+INST(BRA, "BRA", "1110 0010 0100 ----")
+INST(BRK, "BRK", "1110 0011 0100 ----")
+INST(BRX, "BRX", "1110 0010 0101 ----")
+INST(CAL, "CAL", "1110 0010 0110 ----")
+INST(CCTL, "CCTL", "1110 1111 011- ----")
+INST(CCTLL, "CCTLL", "1110 1111 100- ----")
+INST(CONT, "CONT", "1110 0011 0101 ----")
+INST(CS2R, "CS2R", "0101 0000 1100 1---")
+INST(CSET, "CSET", "0101 0000 1001 1---")
+INST(CSETP, "CSETP", "0101 0000 1010 0---")
+INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
+INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
+INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
+INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
+INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
+INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
+INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
+INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
+INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
+INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
+INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
+INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
+INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
+INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
+INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
+INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
+INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
+INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
+INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
+INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
+INST(EXIT, "EXIT", "1110 0011 0000 ----")
+INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
+INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
+INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
+INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
+INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
+INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
+INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
+INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
+INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
+INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
+INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
+INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
+INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
+INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
+INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
+INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
+INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
+INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
+INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
+INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
+INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
+INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
+INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
+INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
+INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
+INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
+INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
+INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
+INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
+INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
+INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
+INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
+INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
+INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
+INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
+INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
+INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
+INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
+INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
+INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
+INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
+INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
+INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
+INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
+INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
+INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
+INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
+INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
+INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
+INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
+INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
+INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
+INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
+INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
+INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
+INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
+INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
+INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
+INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
+INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
+INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
+INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
+INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
+INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
+INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
+INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
+INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
+INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
+INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
+INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
+INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
+INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
+INST(IADD32I, "IADD32I", "0001 110- ---- ----")
+INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
+INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
+INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
+INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
+INST(IDE, "IDE", "1110 0011 1001 ----")
+INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
+INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
+INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
+INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
+INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
+INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
+INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
+INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
+INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
+INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
+INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
+INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
+INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
+INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
+INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
+INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
+INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
+INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
+INST(IPA, "IPA", "1110 0000 ---- ----")
+INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
+INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
+INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
+INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
+INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
+INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
+INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
+INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
+INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
+INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
+INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
+INST(JCAL, "JCAL", "1110 0010 0010 ----")
+INST(JMP, "JMP", "1110 0010 0001 ----")
+INST(JMX, "JMX", "1110 0010 0000 ----")
+INST(KIL, "KIL", "1110 0011 0011 ----")
+INST(LD, "LD", "100- ---- ---- ----")
+INST(LDC, "LDC", "1110 1111 1001 0---")
+INST(LDG, "LDG", "1110 1110 1101 0---")
+INST(LDL, "LDL", "1110 1111 0100 0---")
+INST(LDS, "LDS", "1110 1111 0100 1---")
+INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
+INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
+INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
+INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
+INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
+INST(LEPC, "LEPC", "0101 0000 1101 0---")
+INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
+INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
+INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
+INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
+INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
+INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
+INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
+INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
+INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
+INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
+INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
+INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
+INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
+INST(MUFU, "MUFU", "0101 0000 1000 0---")
+INST(NOP, "NOP", "0101 0000 1011 0---")
+INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
+INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
+INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
+INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
+INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
+INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
+INST(PBK, "PBK", "1110 0010 1010 ----")
+INST(PCNT, "PCNT", "1110 0010 1011 ----")
+INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
+INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
+INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
+INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
+INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
+INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
+INST(PRET, "PRET", "1110 0010 0111 ----")
+INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
+INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
+INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
+INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
+INST(PSET, "PSET", "0101 0000 1000 1---")
+INST(PSETP, "PSETP", "0101 0000 1001 0---")
+INST(R2B, "R2B", "1111 0000 1100 0---")
+INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
+INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
+INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
+INST(RAM, "RAM", "1110 0011 1000 ----")
+INST(RED, "RED", "1110 1011 1111 1---")
+INST(RET, "RET", "1110 0011 0010 ----")
+INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
+INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
+INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
+INST(RTT, "RTT", "1110 0011 0110 ----")
+INST(S2R, "S2R", "1111 0000 1100 1---")
+INST(SAM, "SAM", "1110 0011 0111 ----")
+INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
+INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
+INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
+INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
+INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
+INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
+INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
+INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
+INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
+INST(SHFL, "SHFL", "1110 1111 0001 0---")
+INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
+INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
+INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
+INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
+INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
+INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
+INST(SSY, "SSY", "1110 0010 1001 ----")
+INST(ST, "ST", "101- ---- ---- ----")
+INST(STG, "STG", "1110 1110 1101 1---")
+INST(STL, "STL", "1110 1111 0101 0---")
+INST(STP, "STP", "1110 1110 1010 0---")
+INST(STS, "STS", "1110 1111 0101 1---")
+INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
+INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
+INST(SULD, "SULD", "1110 1011 000- ----")
+INST(SURED, "SURED", "1110 1011 010- ----")
+INST(SUST, "SUST", "1110 1011 001- ----")
+INST(SYNC, "SYNC", "1111 0000 1111 1---")
+INST(TEX, "TEX", "1100 0--- ---- ----")
+INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
+INST(TEXS, "TEXS", "1101 -00- ---- ----")
+INST(TLD, "TLD", "1101 1100 ---- ----")
+INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
+INST(TLD4, "TLD4", "1100 10-- ---- ----")
+INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
+INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
+INST(TLDS, "TLDS", "1101 -01- ---- ----")
+INST(TMML, "TMML", "1101 1111 0101 1---")
+INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
+INST(TXA, "TXA", "1101 1111 0100 0---")
+INST(TXD, "TXD", "1101 1110 00-- ----")
+INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
+INST(TXQ, "TXQ", "1101 1111 0100 1---")
+INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
+INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
+INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
+INST(VADD, "VADD", "0010 00-- ---- ----")
+INST(VMAD, "VMAD", "0101 1111 ---- ----")
+INST(VMNMX, "VMNMX", "0011 101- ---- ----")
+INST(VOTE, "VOTE", "0101 0000 1101 1---")
+INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
+INST(VSET, "VSET", "0100 000- ---- ----")
+INST(VSETP, "VSETP", "0101 0000 1111 0---")
+INST(VSHL, "VSHL", "0101 0111 ---- ----")
+INST(VSHR, "VSHR", "0101 0110 ---- ----")
+INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
+INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
+INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
+INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
+
+// Removed due to its weird formatting making fast tables larger
+// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+
+namespace Shader::Maxwell {
+namespace {
+constexpr std::array NAME_TABLE{
+#define INST(name, cute, encode) cute,
+#include "maxwell.inc"
+#undef INST
+};
+} // Anonymous namespace
+
+const char* NameOf(Opcode opcode) {
+ if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
+ throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
+ }
+ return NAME_TABLE[static_cast<size_t>(opcode)];
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <fmt/format.h>
+
+namespace Shader::Maxwell {
+
+enum class Opcode {
+#define INST(name, cute, encode) name,
+#include "maxwell.inc"
+#undef INST
+};
+
+const char* NameOf(Opcode opcode);
+
+} // namespace Shader::Maxwell
+
+template <>
+struct fmt::formatter<Shader::Maxwell::Opcode> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
+ return format_to(ctx.out(), "{}", NameOf(opcode));
+ }
+};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <version>
+
+#include <fmt/format.h>
+
+#include <boost/intrusive/list.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+namespace {
+struct Statement;
+
+// Use normal_link because we are not guaranteed to destroy the tree in order
+using ListBaseHook =
+ boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
+
+using Tree = boost::intrusive::list<Statement,
+ // Allow using Statement without a definition
+ boost::intrusive::base_hook<ListBaseHook>,
+ // Avoid linear complexity on splice, size is never called
+ boost::intrusive::constant_time_size<false>>;
+using Node = Tree::iterator;
+
+enum class StatementType {
+ Code,
+ Goto,
+ Label,
+ If,
+ Loop,
+ Break,
+ Return,
+ Kill,
+ Unreachable,
+ Function,
+ Identity,
+ Not,
+ Or,
+ SetVariable,
+ SetIndirectBranchVariable,
+ Variable,
+ IndirectBranchCond,
+};
+
+bool HasChildren(StatementType type) {
+ switch (type) {
+ case StatementType::If:
+ case StatementType::Loop:
+ case StatementType::Function:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct Goto {};
+struct Label {};
+struct If {};
+struct Loop {};
+struct Break {};
+struct Return {};
+struct Kill {};
+struct Unreachable {};
+struct FunctionTag {};
+struct Identity {};
+struct Not {};
+struct Or {};
+struct SetVariable {};
+struct SetIndirectBranchVariable {};
+struct Variable {};
+struct IndirectBranchCond {};
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
+#endif
+struct Statement : ListBaseHook {
+ Statement(const Flow::Block* block_, Statement* up_)
+ : block{block_}, up{up_}, type{StatementType::Code} {}
+ Statement(Goto, Statement* cond_, Node label_, Statement* up_)
+ : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
+ Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
+ Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
+ : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
+ Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
+ : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
+ Statement(Break, Statement* cond_, Statement* up_)
+ : cond{cond_}, up{up_}, type{StatementType::Break} {}
+ Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
+ Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
+ Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
+ Statement(FunctionTag) : children{}, type{StatementType::Function} {}
+ Statement(Identity, IR::Condition cond_, Statement* up_)
+ : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
+ Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
+ Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
+ : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
+ Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
+ : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
+ Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
+ : branch_offset{branch_offset_},
+ branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
+ Statement(Variable, u32 id_, Statement* up_)
+ : id{id_}, up{up_}, type{StatementType::Variable} {}
+ Statement(IndirectBranchCond, u32 location_, Statement* up_)
+ : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
+
+ ~Statement() {
+ if (HasChildren(type)) {
+ std::destroy_at(&children);
+ }
+ }
+
+ union {
+ const Flow::Block* block;
+ Node label;
+ Tree children;
+ IR::Condition guest_cond;
+ Statement* op;
+ Statement* op_a;
+ u32 location;
+ s32 branch_offset;
+ };
+ union {
+ Statement* cond;
+ Statement* op_b;
+ u32 id;
+ IR::Reg branch_reg;
+ };
+ Statement* up{};
+ StatementType type;
+};
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+std::string DumpExpr(const Statement* stmt) {
+ switch (stmt->type) {
+ case StatementType::Identity:
+ return fmt::format("{}", stmt->guest_cond);
+ case StatementType::Not:
+ return fmt::format("!{}", DumpExpr(stmt->op));
+ case StatementType::Or:
+ return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
+ case StatementType::Variable:
+ return fmt::format("goto_L{}", stmt->id);
+ case StatementType::IndirectBranchCond:
+ return fmt::format("(indirect_branch == {:x})", stmt->location);
+ default:
+ return "<invalid type>";
+ }
+}
+
+[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
+ std::string ret;
+ std::string indent(indentation, ' ');
+ for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
+ switch (stmt->type) {
+ case StatementType::Code:
+ ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
+ stmt->block->begin.Offset(), stmt->block->end.Offset(),
+ reinterpret_cast<uintptr_t>(stmt->block));
+ break;
+ case StatementType::Goto:
+ ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
+ stmt->label->id);
+ break;
+ case StatementType::Label:
+ ret += fmt::format("{}L{}:\n", indent, stmt->id);
+ break;
+ case StatementType::If:
+ ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
+ ret += DumpTree(stmt->children, indentation + 4);
+ ret += fmt::format("{} }}\n", indent);
+ break;
+ case StatementType::Loop:
+ ret += fmt::format("{} do {{\n", indent);
+ ret += DumpTree(stmt->children, indentation + 4);
+ ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
+ break;
+ case StatementType::Break:
+ ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
+ break;
+ case StatementType::Return:
+ ret += fmt::format("{} return;\n", indent);
+ break;
+ case StatementType::Kill:
+ ret += fmt::format("{} kill;\n", indent);
+ break;
+ case StatementType::Unreachable:
+ ret += fmt::format("{} unreachable;\n", indent);
+ break;
+ case StatementType::SetVariable:
+ ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
+ break;
+ case StatementType::SetIndirectBranchVariable:
+ ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
+ stmt->branch_offset);
+ break;
+ case StatementType::Function:
+ case StatementType::Identity:
+ case StatementType::Not:
+ case StatementType::Or:
+ case StatementType::Variable:
+ case StatementType::IndirectBranchCond:
+ throw LogicError("Statement can't be printed");
+ }
+ }
+ return ret;
+}
+
+void SanitizeNoBreaks(const Tree& tree) {
+ if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
+ throw NotImplementedException("Capturing statement with break nodes");
+ }
+}
+
+size_t Level(Node stmt) {
+ size_t level{0};
+ Statement* node{stmt->up};
+ while (node) {
+ ++level;
+ node = node->up;
+ }
+ return level;
+}
+
+bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
+ const size_t goto_level{Level(goto_stmt)};
+ const size_t label_level{Level(label_stmt)};
+ size_t min_level;
+ size_t max_level;
+ Node min;
+ Node max;
+ if (label_level < goto_level) {
+ min_level = label_level;
+ max_level = goto_level;
+ min = label_stmt;
+ max = goto_stmt;
+ } else { // goto_level < label_level
+ min_level = goto_level;
+ max_level = label_level;
+ min = goto_stmt;
+ max = label_stmt;
+ }
+ while (max_level > min_level) {
+ --max_level;
+ max = max->up;
+ }
+ return min->up == max->up;
+}
+
+bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
+ return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
+}
+
+[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
+ Node it{goto_stmt};
+ do {
+ if (it == label_stmt) {
+ return true;
+ }
+ --it;
+ } while (it != goto_stmt->up->children.begin());
+ while (it != goto_stmt->up->children.end()) {
+ if (it == label_stmt) {
+ return true;
+ }
+ ++it;
+ }
+ return false;
+}
+
+Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
+ Statement* const parent{uncle->up};
+ Statement* it{&*nephew};
+ while (it->up != parent) {
+ it = it->up;
+ }
+ return Tree::s_iterator_to(*it);
+}
+
+bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
+ const Node end{right_sibling->up->children.end()};
+ for (auto it = right_sibling; it != end; ++it) {
+ if (it == left_sibling) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
+ const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
+ return AreOrdered(sibling, goto_stmt);
+}
+
+class GotoPass {
+public:
+ explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
+ std::vector gotos{BuildTree(cfg)};
+ const auto end{gotos.rend()};
+ for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
+ RemoveGoto(*goto_stmt);
+ }
+ }
+
+ Statement& RootStatement() noexcept {
+ return root_stmt;
+ }
+
+private:
+ void RemoveGoto(Node goto_stmt) {
+ // Force goto_stmt and label_stmt to be directly related
+ const Node label_stmt{goto_stmt->label};
+ if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
+ // Move goto_stmt out using outward-movement transformation until it becomes
+ // directly related to label_stmt
+ while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
+ goto_stmt = MoveOutward(goto_stmt);
+ }
+ }
+ // Force goto_stmt and label_stmt to be siblings
+ if (IsDirectlyRelated(goto_stmt, label_stmt)) {
+ const size_t label_level{Level(label_stmt)};
+ size_t goto_level{Level(goto_stmt)};
+ if (goto_level > label_level) {
+ // Move goto_stmt out of its level using outward-movement transformations
+ while (goto_level > label_level) {
+ goto_stmt = MoveOutward(goto_stmt);
+ --goto_level;
+ }
+ } else { // Level(goto_stmt) < Level(label_stmt)
+ if (NeedsLift(goto_stmt, label_stmt)) {
+ // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
+ // transformations
+ goto_stmt = Lift(goto_stmt);
+ }
+ // Move goto_stmt into label_stmt's level using inward-movement transformation
+ while (goto_level < label_level) {
+ goto_stmt = MoveInward(goto_stmt);
+ ++goto_level;
+ }
+ }
+ }
+ // Expensive operation:
+ // if (!AreSiblings(goto_stmt, label_stmt)) {
+ // throw LogicError("Goto is not a sibling with the label");
+ // }
+ // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
+ if (std::next(goto_stmt) == label_stmt) {
+ // Simply eliminate the goto if the label is next to it
+ goto_stmt->up->children.erase(goto_stmt);
+ } else if (AreOrdered(goto_stmt, label_stmt)) {
+ // Eliminate goto_stmt with a conditional
+ EliminateAsConditional(goto_stmt, label_stmt);
+ } else {
+ // Eliminate goto_stmt with a loop
+ EliminateAsLoop(goto_stmt, label_stmt);
+ }
+ }
+
+ std::vector<Node> BuildTree(Flow::CFG& cfg) {
+ u32 label_id{0};
+ std::vector<Node> gotos;
+ Flow::Function& first_function{cfg.Functions().front()};
+ BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
+ return gotos;
+ }
+
+ void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
+ std::vector<Node>& gotos, Node function_insert_point,
+ std::optional<Node> return_label) {
+ Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
+ Tree& root{root_stmt.children};
+ std::unordered_map<Flow::Block*, Node> local_labels;
+ local_labels.reserve(function.blocks.size());
+
+ for (Flow::Block& block : function.blocks) {
+ Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
+ const Node label_it{root.insert(function_insert_point, *label)};
+ local_labels.emplace(&block, label_it);
+ ++label_id;
+ }
+ for (Flow::Block& block : function.blocks) {
+ const Node label{local_labels.at(&block)};
+ // Insertion point
+ const Node ip{std::next(label)};
+
+ // Reset goto variables before the first block and after its respective label
+ const auto make_reset_variable{[&]() -> Statement& {
+ return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
+ }};
+ root.push_front(make_reset_variable());
+ root.insert(ip, make_reset_variable());
+ root.insert(ip, *pool.Create(&block, &root_stmt));
+
+ switch (block.end_class) {
+ case Flow::EndClass::Branch: {
+ Statement* const always_cond{
+ pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
+ if (block.cond == IR::Condition{true}) {
+ const Node true_label{local_labels.at(block.branch_true)};
+ gotos.push_back(
+ root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
+ } else if (block.cond == IR::Condition{false}) {
+ const Node false_label{local_labels.at(block.branch_false)};
+ gotos.push_back(root.insert(
+ ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+ } else {
+ const Node true_label{local_labels.at(block.branch_true)};
+ const Node false_label{local_labels.at(block.branch_false)};
+ Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+ gotos.push_back(
+ root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
+ gotos.push_back(root.insert(
+ ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
+ }
+ break;
+ }
+ case Flow::EndClass::IndirectBranch:
+ root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
+ block.branch_offset, &root_stmt));
+ for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
+ const Node indirect_label{local_labels.at(indirect.block)};
+ Statement* cond{
+ pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
+ Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
+ gotos.push_back(root.insert(ip, *goto_stmt));
+ }
+ root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
+ break;
+ case Flow::EndClass::Call: {
+ Flow::Function& call{cfg.Functions()[block.function_call]};
+ const Node call_return_label{local_labels.at(block.return_block)};
+ BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
+ break;
+ }
+ case Flow::EndClass::Exit:
+ root.insert(ip, *pool.Create(Return{}, &root_stmt));
+ break;
+ case Flow::EndClass::Return: {
+ Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
+ auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
+ gotos.push_back(root.insert(ip, *goto_stmt));
+ break;
+ }
+ case Flow::EndClass::Kill:
+ root.insert(ip, *pool.Create(Kill{}, &root_stmt));
+ break;
+ }
+ }
+ }
+
+ void UpdateTreeUp(Statement* tree) {
+ for (Statement& stmt : tree->children) {
+ stmt.up = tree;
+ }
+ }
+
+ void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
+ Tree& body{goto_stmt->up->children};
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
+ Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
+ Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+ body.erase(goto_stmt);
+ }
+
+ void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
+ Tree& body{goto_stmt->up->children};
+ Tree loop_body;
+ loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
+ Statement* const cond{goto_stmt->cond};
+ Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
+ UpdateTreeUp(loop);
+ body.insert(goto_stmt, *loop);
+ body.erase(goto_stmt);
+ }
+
+ [[nodiscard]] Node MoveOutward(Node goto_stmt) {
+ switch (goto_stmt->up->type) {
+ case StatementType::If:
+ return MoveOutwardIf(goto_stmt);
+ case StatementType::Loop:
+ return MoveOutwardLoop(goto_stmt);
+ default:
+ throw LogicError("Invalid outward movement");
+ }
+ }
+
+ [[nodiscard]] Node MoveInward(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const Node label{goto_stmt->label};
+ const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+ const u32 label_id{label->id};
+
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+ body.insert(goto_stmt, *set_var);
+
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
+ Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
+ if (!if_body.empty()) {
+ Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+ }
+ body.erase(goto_stmt);
+
+ switch (label_nested_stmt->type) {
+ case StatementType::If:
+ // Update nested if condition
+ label_nested_stmt->cond =
+ pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
+ break;
+ case StatementType::Loop:
+ break;
+ default:
+ throw LogicError("Invalid inward movement");
+ }
+ Tree& nested_tree{label_nested_stmt->children};
+ Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
+ return nested_tree.insert(nested_tree.begin(), *new_goto);
+ }
+
+ [[nodiscard]] Node Lift(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const Node label{goto_stmt->label};
+ const u32 label_id{label->id};
+ const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
+
+ Tree loop_body;
+ loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
+ SanitizeNoBreaks(loop_body);
+ Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
+ UpdateTreeUp(loop_stmt);
+ body.insert(goto_stmt, *loop_stmt);
+
+ Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
+ loop_stmt->children.push_front(*new_goto);
+ const Node new_goto_node{loop_stmt->children.begin()};
+
+ Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
+ loop_stmt->children.push_back(*set_var);
+
+ body.erase(goto_stmt);
+ return new_goto_node;
+ }
+
+ Node MoveOutwardIf(Node goto_stmt) {
+ const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
+ Tree& body{parent->children};
+ const u32 label_id{goto_stmt->label->id};
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
+ body.insert(goto_stmt, *set_goto_var);
+
+ Tree if_body;
+ if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
+ if_body.pop_front();
+ Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
+ Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
+ UpdateTreeUp(if_stmt);
+ body.insert(goto_stmt, *if_stmt);
+
+ body.erase(goto_stmt);
+
+ Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
+ Tree& parent_tree{parent->up->children};
+ return parent_tree.insert(std::next(parent), *new_goto);
+ }
+
+ Node MoveOutwardLoop(Node goto_stmt) {
+ Statement* const parent{goto_stmt->up};
+ Tree& body{parent->children};
+ const u32 label_id{goto_stmt->label->id};
+ Statement* const goto_cond{goto_stmt->cond};
+ Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
+ Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
+ body.insert(goto_stmt, *set_goto_var);
+ body.insert(goto_stmt, *break_stmt);
+ body.erase(goto_stmt);
+
+ const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
+ Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
+ Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
+ Tree& parent_tree{loop->up->children};
+ return parent_tree.insert(std::next(loop), *new_goto);
+ }
+
+ ObjectPool<Statement>& pool;
+ Statement root_stmt{FunctionTag{}};
+};
+
+[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
+ Tree& tree{stmt.up->children};
+ const Node end{tree.end()};
+ Node forward_node{std::next(Tree::s_iterator_to(stmt))};
+ while (forward_node != end && !HasChildren(forward_node->type)) {
+ if (forward_node->type == StatementType::Code) {
+ return &*forward_node;
+ }
+ ++forward_node;
+ }
+ return nullptr;
+}
+
+[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
+ switch (stmt.type) {
+ case StatementType::Identity:
+ return ir.Condition(stmt.guest_cond);
+ case StatementType::Not:
+ return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
+ case StatementType::Or:
+ return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
+ case StatementType::Variable:
+ return ir.GetGotoVariable(stmt.id);
+ case StatementType::IndirectBranchCond:
+ return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
+ default:
+ throw NotImplementedException("Statement type {}", stmt.type);
+ }
+}
+
+class TranslatePass {
+public:
+ TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
+ ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
+ IR::AbstractSyntaxList& syntax_list_)
+ : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
+ syntax_list{syntax_list_} {
+ Visit(root_stmt, nullptr, nullptr);
+
+ IR::Block& first_block{*syntax_list.front().data.block};
+ IR::IREmitter ir(first_block, first_block.begin());
+ ir.Prologue();
+ }
+
+private:
+ void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
+ IR::Block* current_block{};
+ const auto ensure_block{[&] {
+ if (current_block) {
+ return;
+ }
+ current_block = block_pool.Create(inst_pool);
+ auto& node{syntax_list.emplace_back()};
+ node.type = IR::AbstractSyntaxNode::Type::Block;
+ node.data.block = current_block;
+ }};
+ Tree& tree{parent.children};
+ for (auto it = tree.begin(); it != tree.end(); ++it) {
+ Statement& stmt{*it};
+ switch (stmt.type) {
+ case StatementType::Label:
+ // Labels can be ignored
+ break;
+ case StatementType::Code: {
+ ensure_block();
+ Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
+ break;
+ }
+ case StatementType::SetVariable: {
+ ensure_block();
+ IR::IREmitter ir{*current_block};
+ ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
+ break;
+ }
+ case StatementType::SetIndirectBranchVariable: {
+ ensure_block();
+ IR::IREmitter ir{*current_block};
+ IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
+ ir.SetIndirectBranchVariable(address);
+ break;
+ }
+ case StatementType::If: {
+ ensure_block();
+ IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+ // Implement if header block
+ IR::IREmitter ir{*current_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+ const size_t if_node_index{syntax_list.size()};
+ syntax_list.emplace_back();
+
+ // Visit children
+ const size_t then_block_index{syntax_list.size()};
+ Visit(stmt, break_block, merge_block);
+
+ IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
+ current_block->AddBranch(then_block);
+ current_block->AddBranch(merge_block);
+ current_block = merge_block;
+
+ auto& if_node{syntax_list[if_node_index]};
+ if_node.type = IR::AbstractSyntaxNode::Type::If;
+ if_node.data.if_node.cond = cond;
+ if_node.data.if_node.body = then_block;
+ if_node.data.if_node.merge = merge_block;
+
+ auto& endif_node{syntax_list.emplace_back()};
+ endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
+ endif_node.data.end_if.merge = merge_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = merge_block;
+ break;
+ }
+ case StatementType::Loop: {
+ IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
+ if (current_block) {
+ current_block->AddBranch(loop_header_block);
+ }
+ auto& header_node{syntax_list.emplace_back()};
+ header_node.type = IR::AbstractSyntaxNode::Type::Block;
+ header_node.data.block = loop_header_block;
+
+ IR::Block* const continue_block{block_pool.Create(inst_pool)};
+ IR::Block* const merge_block{MergeBlock(parent, stmt)};
+
+ const size_t loop_node_index{syntax_list.size()};
+ syntax_list.emplace_back();
+
+ // Visit children
+ const size_t body_block_index{syntax_list.size()};
+ Visit(stmt, merge_block, continue_block);
+
+ // The continue block is located at the end of the loop
+ IR::IREmitter ir{*continue_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+
+ IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
+ loop_header_block->AddBranch(body_block);
+
+ continue_block->AddBranch(loop_header_block);
+ continue_block->AddBranch(merge_block);
+
+ current_block = merge_block;
+
+ auto& loop{syntax_list[loop_node_index]};
+ loop.type = IR::AbstractSyntaxNode::Type::Loop;
+ loop.data.loop.body = body_block;
+ loop.data.loop.continue_block = continue_block;
+ loop.data.loop.merge = merge_block;
+
+ auto& continue_block_node{syntax_list.emplace_back()};
+ continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
+ continue_block_node.data.block = continue_block;
+
+ auto& repeat{syntax_list.emplace_back()};
+ repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
+ repeat.data.repeat.cond = cond;
+ repeat.data.repeat.loop_header = loop_header_block;
+ repeat.data.repeat.merge = merge_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = merge_block;
+ break;
+ }
+ case StatementType::Break: {
+ ensure_block();
+ IR::Block* const skip_block{MergeBlock(parent, stmt)};
+
+ IR::IREmitter ir{*current_block};
+ const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
+ current_block->AddBranch(break_block);
+ current_block->AddBranch(skip_block);
+ current_block = skip_block;
+
+ auto& break_node{syntax_list.emplace_back()};
+ break_node.type = IR::AbstractSyntaxNode::Type::Break;
+ break_node.data.break_node.cond = cond;
+ break_node.data.break_node.merge = break_block;
+ break_node.data.break_node.skip = skip_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = skip_block;
+ break;
+ }
+ case StatementType::Return: {
+ ensure_block();
+ IR::IREmitter{*current_block}.Epilogue();
+ current_block = nullptr;
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
+ break;
+ }
+ case StatementType::Kill: {
+ ensure_block();
+ IR::Block* demote_block{MergeBlock(parent, stmt)};
+ IR::IREmitter{*current_block}.DemoteToHelperInvocation();
+ current_block->AddBranch(demote_block);
+ current_block = demote_block;
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = demote_block;
+ break;
+ }
+ case StatementType::Unreachable: {
+ ensure_block();
+ current_block = nullptr;
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+ break;
+ }
+ default:
+ throw NotImplementedException("Statement type {}", stmt.type);
+ }
+ }
+ if (current_block) {
+ if (fallthrough_block) {
+ current_block->AddBranch(fallthrough_block);
+ } else {
+ syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
+ }
+ }
+ }
+
+ IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
+ Statement* merge_stmt{TryFindForwardBlock(stmt)};
+ if (!merge_stmt) {
+ // Create a merge block we can visit later
+ merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
+ parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
+ }
+ return block_pool.Create(inst_pool);
+ }
+
+ ObjectPool<Statement>& stmt_pool;
+ ObjectPool<IR::Inst>& inst_pool;
+ ObjectPool<IR::Block>& block_pool;
+ Environment& env;
+ IR::AbstractSyntaxList& syntax_list;
+
+// TODO: C++20 Remove this when all compilers support constexpr std::vector
+#if __cpp_lib_constexpr_vector >= 201907
+ static constexpr Flow::Block dummy_flow_block;
+#else
+ const Flow::Block dummy_flow_block;
+#endif
+};
+} // Anonymous namespace
+
+IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg) {
+ ObjectPool<Statement> stmt_pool{64};
+ GotoPass goto_pass{cfg, stmt_pool};
+ Statement& root{goto_pass.RootStatement()};
+ IR::AbstractSyntaxList syntax_list;
+ TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+ return syntax_list;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+ SAFEADD,
+};
+
+enum class AtomSize : u64 {
+ U32,
+ S32,
+ U64,
+ F32,
+ F16x2,
+ S64,
+};
+
+IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
+ AtomOp op, bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.GlobalAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.GlobalAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.GlobalAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.GlobalAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.GlobalAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.GlobalAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.GlobalAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atom Operation {}", op);
+ }
+}
+
+IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
+ AtomSize size) {
+ static constexpr IR::FpControl f16_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ static constexpr IR::FpControl f32_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::RN,
+ .fmz_mode = IR::FmzMode::FTZ,
+ };
+ switch (op) {
+ case AtomOp::ADD:
+ return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
+ : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
+ case AtomOp::MIN:
+ return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
+ case AtomOp::MAX:
+ return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
+ default:
+ throw NotImplementedException("FP Atom Operation {}", op);
+ }
+}
+
+IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<28, 20, s64> addr_offset;
+ BitField<28, 20, u64> rz_addr_offset;
+ BitField<48, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ return v.L(mem.addr_reg);
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+
+bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
+ // TODO: SAFEADD
+ switch (size) {
+ case AtomSize::S32:
+ case AtomSize::U64:
+ return (op == AtomOp::INC || op == AtomOp::DEC);
+ case AtomSize::S64:
+ return !(op == AtomOp::MIN || op == AtomOp::MAX);
+ case AtomSize::F32:
+ return op != AtomOp::ADD;
+ case AtomSize::F16x2:
+ return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
+ default:
+ return false;
+ }
+}
+
+IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F32:
+ case AtomSize::F16x2:
+ return ir.LoadGlobal32(offset);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ir.PackUint2x32(ir.LoadGlobal64(offset));
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ case AtomSize::F16x2:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return v.L(dest_reg, IR::U64{result});
+ case AtomSize::F32:
+ return v.F(dest_reg, IR::F32{result});
+ default:
+ break;
+ }
+}
+
+IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
+ AtomSize size, AtomOp op) {
+ switch (size) {
+ case AtomSize::U32:
+ case AtomSize::S32:
+ return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
+ case AtomSize::U64:
+ case AtomSize::S64:
+ return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
+ case AtomSize::F32:
+ return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
+ case AtomSize::F16x2: {
+ return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
+ }
+ default:
+ throw NotImplementedException("Atom Size {}", size);
+ }
+}
+
+void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
+ const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
+ IR::Value result;
+ if (AtomOpNotApplicable(size, op)) {
+ result = LoadGlobal(v.ir, offset, size);
+ } else {
+ result = ApplyAtomOp(v, operand_reg, offset, size, op);
+ }
+ if (write_dest) {
+ StoreResult(v, dest_reg, result, size);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<49, 3, AtomSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atom{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
+}
+
+void TranslatorVisitor::RED(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<20, 3, AtomSize> size;
+ BitField<23, 3, AtomOp> op;
+ } const red{insn};
+ const IR::U64 offset{AtomOffset(*this, insn)};
+ GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class AtomOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class AtomsSize : u64 {
+ U32,
+ S32,
+ U64,
+};
+
+IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomOp::ADD:
+ return ir.SharedAtomicIAdd(offset, op_b);
+ case AtomOp::MIN:
+ return ir.SharedAtomicIMin(offset, op_b, is_signed);
+ case AtomOp::MAX:
+ return ir.SharedAtomicIMax(offset, op_b, is_signed);
+ case AtomOp::INC:
+ return ir.SharedAtomicInc(offset, op_b);
+ case AtomOp::DEC:
+ return ir.SharedAtomicDec(offset, op_b);
+ case AtomOp::AND:
+ return ir.SharedAtomicAnd(offset, op_b);
+ case AtomOp::OR:
+ return ir.SharedAtomicOr(offset, op_b);
+ case AtomOp::XOR:
+ return ir.SharedAtomicXor(offset, op_b);
+ case AtomOp::EXCH:
+ return ir.SharedAtomicExchange(offset, op_b);
+ default:
+ throw NotImplementedException("Integer Atoms Operation {}", op);
+ }
+}
+
+IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<30, 22, u64> absolute_offset;
+ BitField<30, 22, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
+ switch (size) {
+ case AtomsSize::U32:
+ case AtomsSize::S32:
+ return v.X(dest_reg, IR::U32{result});
+ case AtomsSize::U64:
+ return v.L(dest_reg, IR::U64{result});
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ATOMS(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<28, 2, AtomsSize> size;
+ BitField<52, 4, AtomOp> op;
+ } const atoms{insn};
+
+ const bool size_64{atoms.size == AtomsSize::U64};
+ if (size_64 && atoms.op != AtomOp::EXCH) {
+ throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
+ }
+ const bool is_signed{atoms.size == AtomsSize::S32};
+ const IR::U32 offset{AtomsOffset(*this, insn)};
+
+ IR::Value result;
+ if (size_64) {
+ result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
+ } else {
+ result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
+ }
+ StoreResult(*this, atoms.dest_reg, result, atoms.size);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class BitSize : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+void TranslatorVisitor::AL2P(u64 inst) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> result_register;
+ BitField<8, 8, IR::Reg> indexing_register;
+ BitField<20, 11, s64> offset;
+ BitField<47, 2, BitSize> bitsize;
+ } al2p{inst};
+ if (al2p.bitsize != BitSize::B32) {
+ throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
+ }
+ const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
+ const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
+ X(al2p.result_register, result);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// Seems to be in CUDA terminology.
+enum class LocalScope : u64 {
+ CTA,
+ GL,
+ SYS,
+ VC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MEMBAR(u64 inst) {
+ union {
+ u64 raw;
+ BitField<8, 2, LocalScope> scope;
+ } const membar{inst};
+
+ if (membar.scope == LocalScope::CTA) {
+ ir.WorkgroupMemoryBarrier();
+ } else {
+ ir.DeviceMemoryBarrier();
+ }
+}
+
+void TranslatorVisitor::DEPBAR() {
+ // DEPBAR is a no-op
+}
+
+void TranslatorVisitor::BAR(u64 insn) {
+ enum class Mode {
+ RedPopc,
+ Scan,
+ RedAnd,
+ RedOr,
+ Sync,
+ Arrive,
+ };
+ union {
+ u64 raw;
+ BitField<43, 1, u64> is_a_imm;
+ BitField<44, 1, u64> is_b_imm;
+ BitField<8, 8, u64> imm_a;
+ BitField<20, 12, u64> imm_b;
+ BitField<42, 1, u64> neg_pred;
+ BitField<39, 3, IR::Pred> pred;
+ } const bar{insn};
+
+ const Mode mode{[insn] {
+ switch (insn & 0x0000009B00000000ULL) {
+ case 0x0000000200000000ULL:
+ return Mode::RedPopc;
+ case 0x0000000300000000ULL:
+ return Mode::Scan;
+ case 0x0000000A00000000ULL:
+ return Mode::RedAnd;
+ case 0x0000001200000000ULL:
+ return Mode::RedOr;
+ case 0x0000008000000000ULL:
+ return Mode::Sync;
+ case 0x0000008100000000ULL:
+ return Mode::Arrive;
+ }
+ throw NotImplementedException("Invalid encoding");
+ }()};
+ if (mode != Mode::Sync) {
+ throw NotImplementedException("BAR mode {}", mode);
+ }
+ if (bar.is_a_imm == 0) {
+ throw NotImplementedException("Non-immediate input A");
+ }
+ if (bar.imm_a != 0) {
+ throw NotImplementedException("Non-zero input A");
+ }
+ if (bar.is_b_imm == 0) {
+ throw NotImplementedException("Non-immediate input B");
+ }
+ if (bar.imm_b != 0) {
+ throw NotImplementedException("Non-zero input B");
+ }
+ if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
+ throw NotImplementedException("Non-true input predicate");
+ }
+ ir.Barrier();
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<40, 1, u64> brev;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const bfe{insn};
+
+ const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
+ const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+
+ // Common constants
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 one{v.ir.Imm32(1)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+ // Edge case conditions
+ const IR::U1 zero_count{v.ir.IEqual(count, zero)};
+ const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
+ const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
+
+ IR::U32 base{v.X(bfe.offset_reg)};
+ if (bfe.brev != 0) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
+ if (bfe.is_signed != 0) {
+ const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
+ const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
+ // Replicate condition
+ result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
+ // Exceeding condition
+ const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
+ result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
+ }
+ // Zero count condition
+ result = IR::U32{v.ir.Select(zero_count, zero, result)};
+
+ v.X(bfe.dest_reg, result);
+
+ if (bfe.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFE_reg(u64 insn) {
+ BFE(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::BFE_cbuf(u64 insn) {
+ BFE(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFE_imm(u64 insn) {
+ BFE(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> insert_reg;
+ BitField<47, 1, u64> cc;
+ } const bfi{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
+ const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
+ const IR::U32 max_size{v.ir.Imm32(32)};
+
+ // Edge case conditions
+ const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
+ const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
+
+ const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
+ const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
+
+ const IR::U32 insert{v.X(bfi.insert_reg)};
+ IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
+
+ result = IR::U32{v.ir.Select(exceed_offset, base, result)};
+
+ v.X(bfi.dest_reg, result);
+ if (bfi.cc != 0) {
+ v.SetZFlag(v.ir.IEqual(result, zero));
+ v.SetSFlag(v.ir.ILessThan(result, zero, true));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BFI_reg(u64 insn) {
+ BFI(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_rc(u64 insn) {
+ BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::BFI_cr(u64 insn) {
+ BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::BFI_imm(u64 insn) {
+ BFI(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void Check(u64 insn) {
+ union {
+ u64 raw;
+ BitField<5, 1, u64> cbuf_mode;
+ BitField<6, 1, u64> lmt;
+ } const encoding{insn};
+
+ if (encoding.cbuf_mode != 0) {
+ throw NotImplementedException("Constant buffer mode");
+ }
+ if (encoding.lmt != 0) {
+ throw NotImplementedException("LMT");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::BRX(u64 insn) {
+ Check(insn);
+}
+
+void TranslatorVisitor::JMX(u64 insn) {
+ Check(insn);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+
+namespace Shader::Maxwell {
+
+enum class FpRounding : u64 {
+ RN,
+ RM,
+ RP,
+ RZ,
+};
+
+enum class FmzMode : u64 {
+ None,
+ FTZ,
+ FMZ,
+ INVALIDFMZ3,
+};
+
+inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
+ switch (fp_rounding) {
+ case FpRounding::RN:
+ return IR::FpRounding::RN;
+ case FpRounding::RM:
+ return IR::FpRounding::RM;
+ case FpRounding::RP:
+ return IR::FpRounding::RP;
+ case FpRounding::RZ:
+ return IR::FpRounding::RZ;
+ }
+ throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
+}
+
+inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
+ switch (fmz_mode) {
+ case FmzMode::None:
+ return IR::FmzMode::None;
+ case FmzMode::FTZ:
+ return IR::FmzMode::FTZ;
+ case FmzMode::FMZ:
+ // FMZ is manually handled in the instruction
+ return IR::FmzMode::FTZ;
+ case FmzMode::INVALIDFMZ3:
+ break;
+ }
+ throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+
+namespace Shader::Maxwell {
+IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return ir.ILessThan(operand_1, operand_2, is_signed);
+ case CompareOp::Equal:
+ return ir.IEqual(operand_1, operand_2);
+ case CompareOp::LessThanEqual:
+ return ir.ILessThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::GreaterThan:
+ return ir.IGreaterThan(operand_1, operand_2, is_signed);
+ case CompareOp::NotEqual:
+ return ir.INotEqual(operand_1, operand_2);
+ case CompareOp::GreaterThanEqual:
+ return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
+ const IR::U1 z_flag{ir.GetZFlag()};
+ const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
+ const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
+ : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
+ ir.ILessThan(operand_2, zero, true))};
+ switch (compare_op) {
+ case CompareOp::False:
+ return ir.Imm1(false);
+ case CompareOp::LessThan:
+ return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ case CompareOp::Equal:
+ return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
+ case CompareOp::LessThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
+ ir.ILessThan(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::GreaterThan: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
+ ir.IGreaterThan(intermediate, zero, true))};
+ const IR::U1 not_z{ir.LogicalNot(z_flag)};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
+ }
+ case CompareOp::NotEqual:
+ return ir.LogicalOr(ir.INotEqual(intermediate, zero),
+ ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
+ case CompareOp::GreaterThanEqual: {
+ const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
+ ir.IGreaterThanEqual(intermediate, zero, true))};
+ return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
+ }
+ case CompareOp::True:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid compare op {}", compare_op);
+ }
+}
+
+IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
+ BooleanOp bop) {
+ switch (bop) {
+ case BooleanOp::AND:
+ return ir.LogicalAnd(predicate_1, predicate_2);
+ case BooleanOp::OR:
+ return ir.LogicalOr(predicate_1, predicate_2);
+ case BooleanOp::XOR:
+ return ir.LogicalXor(predicate_1, predicate_2);
+ default:
+ throw NotImplementedException("Invalid bop {}", bop);
+ }
+}
+
+IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
+ switch (op) {
+ case PredicateOp::False:
+ return ir.Imm1(false);
+ case PredicateOp::True:
+ return ir.Imm1(true);
+ case PredicateOp::Zero:
+ return ir.IEqual(result, ir.Imm32(0));
+ case PredicateOp::NonZero:
+ return ir.INotEqual(result, ir.Imm32(0));
+ default:
+ throw NotImplementedException("Invalid Predicate operation {}", op);
+ }
+}
+
+bool IsCompareOpOrdered(FPCompareOp op) {
+ switch (op) {
+ case FPCompareOp::LTU:
+ case FPCompareOp::EQU:
+ case FPCompareOp::LEU:
+ case FPCompareOp::GTU:
+ case FPCompareOp::NEU:
+ case FPCompareOp::GEU:
+ return false;
+ default:
+ return true;
+ }
+}
+
+IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control) {
+ const bool ordered{IsCompareOpOrdered(compare_op)};
+ switch (compare_op) {
+ case FPCompareOp::F:
+ return ir.Imm1(false);
+ case FPCompareOp::LT:
+ case FPCompareOp::LTU:
+ return ir.FPLessThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::EQ:
+ case FPCompareOp::EQU:
+ return ir.FPEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::LE:
+ case FPCompareOp::LEU:
+ return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GT:
+ case FPCompareOp::GTU:
+ return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NE:
+ case FPCompareOp::NEU:
+ return ir.FPNotEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::GE:
+ case FPCompareOp::GEU:
+ return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
+ case FPCompareOp::NUM:
+ return ir.FPOrdered(operand_1, operand_2);
+ case FPCompareOp::Nan:
+ return ir.FPUnordered(operand_1, operand_2);
+ case FPCompareOp::T:
+ return ir.Imm1(true);
+ default:
+ throw NotImplementedException("Invalid FP compare op {}", compare_op);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
+
+[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, CompareOp compare_op,
+ bool is_signed);
+
+[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
+ const IR::U1& predicate_2, BooleanOp bop);
+
+[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
+
+[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
+
+[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
+ const IR::F16F32F64& operand_2, FPCompareOp compare_op,
+ IR::FpControl control = {});
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+void TranslatorVisitor::CSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ } const cset{insn};
+
+ const IR::U32 one_mask{ir.Imm32(-1)};
+ const IR::U32 fp_one{ir.Imm32(0x3f800000)};
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
+ const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
+ const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
+ const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
+ X(cset.dest_reg, result);
+ if (cset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (cset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+void TranslatorVisitor::CSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 5, IR::FlowTest> cc_test;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<45, 2, BooleanOp> bop;
+ } const csetp{insn};
+
+ const BooleanOp bop{csetp.bop};
+ const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
+ const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
+ const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
+ ir.SetPred(csetp.dest_pred_a, result_a);
+ ir.SetPred(csetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ } const dadd{insn};
+ if (dadd.cc != 0) {
+ throw NotImplementedException("DADD CC");
+ }
+
+ const IR::F64 src_a{v.D(dadd.src_a_reg)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dadd.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DADD_reg(u64 insn) {
+ DADD(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DADD_cbuf(u64 insn) {
+ DADD(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DADD_imm(u64 insn) {
+ DADD(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ } const dset{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
+
+ IR::U1 pred{v.ir.GetPred(dset.pred)};
+ if (dset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(dset.dest_reg, result);
+ if (dset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (dset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSET_reg(u64 insn) {
+ DSET(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSET_cbuf(u64 insn) {
+ DSET(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSET_imm(u64 insn) {
+ DSET(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<50, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ } const dfma{insn};
+
+ if (dfma.cc != 0) {
+ throw NotImplementedException("DFMA CC");
+ }
+
+ const IR::F64 src_a{v.D(dfma.src_a_reg)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
+ const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dfma.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DFMA_reg(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_cr(u64 insn) {
+ DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
+}
+
+void TranslatorVisitor::DFMA_rc(u64 insn) {
+ DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DFMA_imm(u64 insn) {
+ DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const dmnmx{insn};
+
+ if (dmnmx.cc != 0) {
+ throw NotImplementedException("DMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
+
+ IR::F64 max{v.ir.FPMax(op_a, op_b)};
+ IR::F64 min{v.ir.FPMin(op_a, op_b)};
+
+ if (dmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+ v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMNMX_reg(u64 insn) {
+ DMNMX(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
+ DMNMX(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMNMX_imm(u64 insn) {
+ DMNMX(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg;
+ } const dmul{insn};
+
+ if (dmul.cc != 0) {
+ throw NotImplementedException("DMUL CC");
+ }
+
+ const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
+ const IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(dmul.fp_rounding),
+ .fmz_mode = IR::FmzMode::None,
+ };
+
+ v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DMUL_reg(u64 insn) {
+ DMUL(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DMUL_cbuf(u64 insn) {
+ DMUL(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DMUL_imm(u64 insn) {
+ DMUL(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const dsetp{insn};
+
+ const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
+ const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
+
+ const BooleanOp bop{dsetp.bop};
+ const FPCompareOp compare_op{dsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
+ const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(dsetp.dest_pred_a, result_a);
+ v.ir.SetPred(dsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::DSETP_reg(u64 insn) {
+ DSETP(*this, insn, GetDoubleReg20(insn));
+}
+
+void TranslatorVisitor::DSETP_cbuf(u64 insn) {
+ DSETP(*this, insn, GetDoubleCbuf(insn));
+}
+
+void TranslatorVisitor::DSETP_imm(u64 insn) {
+ DSETP(*this, insn, GetDoubleImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ExitFragment(TranslatorVisitor& v) {
+ const ProgramHeader sph{v.env.SPH()};
+ IR::Reg src_reg{IR::Reg::R0};
+ for (u32 render_target = 0; render_target < 8; ++render_target) {
+ const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
+ for (u32 component = 0; component < 4; ++component) {
+ if (!mask[component]) {
+ continue;
+ }
+ v.ir.SetFragColor(render_target, component, v.F(src_reg));
+ ++src_reg;
+ }
+ }
+ if (sph.ps.omap.sample_mask != 0) {
+ v.ir.SetSampleMask(v.X(src_reg));
+ }
+ if (sph.ps.omap.depth != 0) {
+ v.ir.SetFragDepth(v.F(src_reg + 1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::EXIT() {
+ switch (env.ShaderStage()) {
+ case Stage::Fragment:
+ ExitFragment(*this);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ BitField<41, 1, u64> shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const flo{insn};
+
+ if (flo.cc != 0) {
+ throw NotImplementedException("CC");
+ }
+ if (flo.tilde != 0) {
+ src = v.ir.BitwiseNot(src);
+ }
+ IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
+ if (flo.shift != 0) {
+ const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
+ result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
+ }
+ v.X(flo.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FLO_reg(u64 insn) {
+ FLO(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::FLO_cbuf(u64 insn) {
+ FLO(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::FLO_imm(u64 insn) {
+ FLO(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
+ const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fadd{insn};
+
+ if (cc) {
+ throw NotImplementedException("FADD CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
+ IR::FpControl control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fadd.dest_reg, value);
+}
+
+void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_a;
+ BitField<49, 1, u64> abs_b;
+ BitField<50, 1, u64> sat;
+ } const fadd{insn};
+
+ FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
+ fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FADD_reg(u64 insn) {
+ FADD(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FADD_cbuf(u64 insn) {
+ FADD(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FADD_imm(u64 insn) {
+ FADD(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<56, 1, u64> neg_a;
+ BitField<54, 1, u64> abs_a;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> neg_b;
+ BitField<57, 1, u64> abs_b;
+ } const fadd32i{insn};
+
+ FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
+ fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fcmp{insn};
+
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
+ const IR::U32 src_reg{v.X(fcmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(fcmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FCMP_reg(u64 insn) {
+ FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_rc(u64 insn) {
+ FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FCMP_cr(u64 insn) {
+ FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FCMP_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const fcmp{insn};
+ const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
+ const u32 value{static_cast<u32>(fcmp.value) << 12};
+
+ FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 4, FPCompareOp> compare_op;
+ BitField<52, 1, u64> bf;
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ BitField<55, 1, u64> ftz;
+ } const fset{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(fset.pred)};
+ if (fset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(fset.dest_reg, result);
+ if (fset.cc != 0) {
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (fset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSET_reg(u64 insn) {
+ FSET(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSET_cbuf(u64 insn) {
+ FSET(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSET_imm(u64 insn) {
+ FSET(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class RoundingOp : u64 {
+ None = 0,
+ Pass = 3,
+ Round = 8,
+ Floor = 9,
+ Ceil = 10,
+ Trunc = 11,
+};
+
+[[nodiscard]] u32 WidthSize(FloatFormat width) {
+ switch (width) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<50, 1, u64> sat;
+ BitField<39, 4, u64> rounding_op;
+ BitField<39, 2, FpRounding> rounding;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<8, 2, FloatFormat> dst_size;
+
+ [[nodiscard]] RoundingOp RoundingOperation() const {
+ constexpr u64 rounding_mask = 0x0B;
+ return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
+ }
+ } const f2f{insn};
+
+ if (f2f.cc != 0) {
+ throw NotImplementedException("F2F CC");
+ }
+
+ IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
+
+ const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
+ IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ if (f2f.src_size != f2f.dst_size) {
+ fp_control.rounding = CastFpRounding(f2f.rounding);
+ input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
+ } else {
+ switch (f2f.RoundingOperation()) {
+ case RoundingOp::None:
+ case RoundingOp::Pass:
+ // Make sure NANs are handled properly
+ switch (f2f.src_size) {
+ case FloatFormat::F16:
+ input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
+ break;
+ case FloatFormat::F32:
+ input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
+ break;
+ case FloatFormat::F64:
+ input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
+ break;
+ }
+ break;
+ case RoundingOp::Round:
+ input = v.ir.FPRoundEven(input, fp_control);
+ break;
+ case RoundingOp::Floor:
+ input = v.ir.FPFloor(input, fp_control);
+ break;
+ case RoundingOp::Ceil:
+ input = v.ir.FPCeil(input, fp_control);
+ break;
+ case RoundingOp::Trunc:
+ input = v.ir.FPTrunc(input, fp_control);
+ break;
+ default:
+ throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
+ }
+ }
+ if (f2f.sat != 0 && !any_fp64) {
+ input = v.ir.FPSaturate(input);
+ }
+
+ switch (f2f.dst_size) {
+ case FloatFormat::F16: {
+ const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(f2f.dest_reg, input);
+ break;
+ case FloatFormat::F64:
+ v.D(f2f.dest_reg, input);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2F_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatReg20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleReg20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
+ src_a = f2f.selector != 0 ? rhs_a : lhs_a;
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatCbuf(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleCbuf(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
+ union {
+ u64 insn;
+ BitField<49, 1, u64> abs;
+ BitField<10, 2, FloatFormat> src_size;
+ BitField<41, 1, u64> selector;
+ BitField<20, 19, u64> imm;
+ BitField<56, 1, u64> imm_neg;
+ } const f2f{insn};
+
+ IR::F16F32F64 src_a;
+ switch (f2f.src_size) {
+ case FloatFormat::F16: {
+ const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
+ const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
+ src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
+ if (f2f.imm_neg != 0) {
+ throw NotImplementedException("Neg bit on F16");
+ }
+ break;
+ }
+ case FloatFormat::F32:
+ src_a = GetFloatImm20(insn);
+ break;
+ case FloatFormat::F64:
+ src_a = GetDoubleImm20(insn);
+ break;
+ default:
+ throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
+ }
+ F2F(*this, insn, src_a, f2f.abs != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <limits>
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class DestFormat : u64 {
+ Invalid,
+ I16,
+ I32,
+ I64,
+};
+enum class SrcFormat : u64 {
+ Invalid,
+ F16,
+ F32,
+ F64,
+};
+enum class Rounding : u64 {
+ Round,
+ Floor,
+ Ceil,
+ Trunc,
+};
+
+union F2I {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, DestFormat> dest_format;
+ BitField<10, 2, SrcFormat> src_format;
+ BitField<12, 1, u64> is_signed;
+ BitField<39, 2, Rounding> rounding;
+ BitField<41, 1, u64> half;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> abs;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> neg;
+};
+
+size_t BitSize(DestFormat dest_format) {
+ switch (dest_format) {
+ case DestFormat::I16:
+ return 16;
+ case DestFormat::I32:
+ return 32;
+ case DestFormat::I64:
+ return 64;
+ default:
+ throw NotImplementedException("Invalid destination format {}", dest_format);
+ }
+}
+
+std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
+ if (is_signed) {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<s16>::max()),
+ static_cast<f64>(std::numeric_limits<s16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<s32>::max()),
+ static_cast<f64>(std::numeric_limits<s32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<s64>::max()),
+ static_cast<f64>(std::numeric_limits<s64>::min())};
+ default:
+ break;
+ }
+ } else {
+ switch (format) {
+ case DestFormat::I16:
+ return {static_cast<f64>(std::numeric_limits<u16>::max()),
+ static_cast<f64>(std::numeric_limits<u16>::min())};
+ case DestFormat::I32:
+ return {static_cast<f64>(std::numeric_limits<u32>::max()),
+ static_cast<f64>(std::numeric_limits<u32>::min())};
+ case DestFormat::I64:
+ return {static_cast<f64>(std::numeric_limits<u64>::max()),
+ static_cast<f64>(std::numeric_limits<u64>::min())};
+ default:
+ break;
+ }
+ }
+ throw NotImplementedException("Invalid destination format {}", format);
+}
+
+IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, s64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
+ }
+ if (cbuf.offset % 2 != 0) {
+ throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
+ }
+ const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
+ const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
+ const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
+ const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
+ return v.ir.PackDouble2x32(vector);
+}
+
+void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
+ // F2I is used to convert from a floating point value to an integer
+ const F2I f2i{insn};
+
+ const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
+ f2i.dest_format != DestFormat::I64};
+ IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
+ if (denorm_cares) {
+ fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
+ }
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = fmz_mode,
+ };
+ const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
+ const IR::F16F32F64 rounded_value{[&] {
+ switch (f2i.rounding) {
+ case Rounding::Round:
+ return v.ir.FPRoundEven(op_a, fp_control);
+ case Rounding::Floor:
+ return v.ir.FPFloor(op_a, fp_control);
+ case Rounding::Ceil:
+ return v.ir.FPCeil(op_a, fp_control);
+ case Rounding::Trunc:
+ return v.ir.FPTrunc(op_a, fp_control);
+ default:
+ throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
+ }
+ }()};
+ const bool is_signed{f2i.is_signed != 0};
+ const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
+
+ IR::F16F32F64 intermediate;
+ switch (f2i.src_format) {
+ case SrcFormat::F16: {
+ const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
+ const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F32: {
+ const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
+ const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ case SrcFormat::F64: {
+ const IR::F64 max_val{v.ir.Imm64(max_bound)};
+ const IR::F64 min_val{v.ir.Imm64(min_bound)};
+ intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
+ }
+
+ const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
+ IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
+
+ bool handled_special_case = false;
+ const bool special_nan_cases =
+ (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
+ if (special_nan_cases) {
+ if (f2i.dest_format == DestFormat::I32) {
+ handled_special_case = true;
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
+ } else if (f2i.dest_format == DestFormat::I64) {
+ handled_special_case = true;
+ result = IR::U64{
+ v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
+ }
+ }
+ if (!handled_special_case && is_signed) {
+ if (bitsize != 64) {
+ result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
+ } else {
+ result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
+ }
+ }
+
+ if (bitsize == 64) {
+ v.L(f2i.dest_reg, result);
+ } else {
+ v.X(f2i.dest_reg, result);
+ }
+
+ if (f2i.cc != 0) {
+ throw NotImplementedException("F2I CC");
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::F2I_reg(u64 insn) {
+ union {
+ u64 raw;
+ F2I base;
+ BitField<20, 8, IR::Reg> src_reg;
+ } const f2i{insn};
+
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.base.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
+ case SrcFormat::F32:
+ return F(f2i.src_reg);
+ case SrcFormat::F64:
+ return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
+ default:
+ throw NotImplementedException("Invalid F2I source format {}",
+ f2i.base.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_cbuf(u64 insn) {
+ const F2I f2i{insn};
+ const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
+ switch (f2i.src_format) {
+ case SrcFormat::F16:
+ return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
+ case SrcFormat::F32:
+ return GetFloatCbuf(insn);
+ case SrcFormat::F64: {
+ return UnpackCbuf(*this, insn);
+ }
+ default:
+ throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
+ }
+ }()};
+ TranslateF2I(*this, insn, op_a);
+}
+
+void TranslatorVisitor::F2I_imm(u64) {
+ throw NotImplementedException("{}", Opcode::F2I_imm);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
+ bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const ffma{insn};
+
+ if (cc) {
+ throw NotImplementedException("FFMA CC");
+ }
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, op_c, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(ffma.dest_reg, value);
+}
+
+void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> sat;
+ BitField<51, 2, FpRounding> fp_rounding;
+ BitField<53, 2, FmzMode> fmz_mode;
+ } const ffma{insn};
+
+ FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
+ ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FFMA_reg(u64 insn) {
+ FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_rc(u64 insn) {
+ FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FFMA_cr(u64 insn) {
+ FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA_imm(u64 insn) {
+ FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
+}
+
+void TranslatorVisitor::FFMA32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz_mode;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<57, 1, u64> neg_c;
+ } const ffma32i{insn};
+
+ FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
+ ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<44, 1, u64> ftz;
+ BitField<45, 1, u64> negate_b;
+ BitField<46, 1, u64> abs_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> negate_a;
+ BitField<49, 1, u64> abs_b;
+ } const fmnmx{insn};
+
+ if (fmnmx.cc) {
+ throw NotImplementedException("FMNMX CC");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
+ IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
+
+ if (fmnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMNMX_reg(u64 insn) {
+ FMNMX(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
+ FMNMX(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMNMX_imm(u64 insn) {
+ FMNMX(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Operation : u64 {
+ Cos = 0,
+ Sin = 1,
+ Ex2 = 2, // Base 2 exponent
+ Lg2 = 3, // Base 2 logarithm
+ Rcp = 4, // Reciprocal
+ Rsq = 5, // Reciprocal square root
+ Rcp64H = 6, // 64-bit reciprocal
+ Rsq64H = 7, // 64-bit reciprocal square root
+ Sqrt = 8,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::MUFU(u64 insn) {
+ // MUFU is used to implement a bunch of special functions. See Operation.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 4, Operation> operation;
+ BitField<46, 1, u64> abs;
+ BitField<48, 1, u64> neg;
+ BitField<50, 1, u64> sat;
+ } const mufu{insn};
+
+ const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
+ IR::F32 value{[&]() -> IR::F32 {
+ switch (mufu.operation) {
+ case Operation::Cos:
+ return ir.FPCos(op_a);
+ case Operation::Sin:
+ return ir.FPSin(op_a);
+ case Operation::Ex2:
+ return ir.FPExp2(op_a);
+ case Operation::Lg2:
+ return ir.FPLog2(op_a);
+ case Operation::Rcp:
+ return ir.FPRecip(op_a);
+ case Operation::Rsq:
+ return ir.FPRecipSqrt(op_a);
+ case Operation::Rcp64H:
+ throw NotImplementedException("MUFU.RCP64H");
+ case Operation::Rsq64H:
+ throw NotImplementedException("MUFU.RSQ64H");
+ case Operation::Sqrt:
+ return ir.FPSqrt(op_a);
+ default:
+ throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
+ }
+ }()};
+
+ if (mufu.sat) {
+ value = ir.FPSaturate(value);
+ }
+
+ F(mufu.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Scale : u64 {
+ None,
+ D2,
+ D4,
+ D8,
+ M8,
+ M4,
+ M2,
+ INVALIDSCALE37,
+};
+
+float ScaleFactor(Scale scale) {
+ switch (scale) {
+ case Scale::None:
+ return 1.0f;
+ case Scale::D2:
+ return 1.0f / 2.0f;
+ case Scale::D4:
+ return 1.0f / 4.0f;
+ case Scale::D8:
+ return 1.0f / 8.0f;
+ case Scale::M8:
+ return 8.0f;
+ case Scale::M4:
+ return 4.0f;
+ case Scale::M2:
+ return 2.0f;
+ case Scale::INVALIDSCALE37:
+ break;
+ }
+ throw NotImplementedException("Invalid FMUL scale {}", scale);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
+ FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const fmul{insn};
+
+ if (cc) {
+ throw NotImplementedException("FMUL CC");
+ }
+ IR::F32 op_a{v.F(fmul.src_a)};
+ if (scale != Scale::None) {
+ if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
+ throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
+ }
+ op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
+ }
+ const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = CastFpRounding(fp_rounding),
+ .fmz_mode = CastFmzMode(fmz_mode),
+ };
+ IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
+ if (fmz_mode == FmzMode::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
+ const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
+ const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
+ value = IR::F32{v.ir.Select(any_zero, zero, value)};
+ }
+ if (sat) {
+ value = v.ir.FPSaturate(value);
+ }
+ v.F(fmul.dest_reg, value);
+}
+
+void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 raw;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 3, Scale> scale;
+ BitField<44, 2, FmzMode> fmz;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<50, 1, u64> sat;
+ } const fmul{insn};
+
+ FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
+ fmul.neg_b != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FMUL_reg(u64 insn) {
+ return FMUL(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FMUL_cbuf(u64 insn) {
+ return FMUL(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FMUL_imm(u64 insn) {
+ return FMUL(*this, insn, GetFloatImm20(insn));
+}
+
+void TranslatorVisitor::FMUL32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 2, FmzMode> fmz;
+ BitField<55, 1, u64> sat;
+ } const fmul32i{insn};
+
+ FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
+ fmul32i.sat != 0, fmul32i.cc != 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ SINCOS,
+ EX2,
+};
+
+void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 1, Mode> mode;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+ } const rro{insn};
+
+ v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::RRO_reg(u64 insn) {
+ RRO(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::RRO_cbuf(u64 insn) {
+ RRO(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::RRO_imm(u64) {
+ throw NotImplementedException("RRO (imm)");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<6, 1, u64> negate_b;
+ BitField<7, 1, u64> abs_a;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> negate_a;
+ BitField<44, 1, u64> abs_b;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> ftz;
+ BitField<48, 4, FPCompareOp> compare_op;
+ } const fsetp{insn};
+
+ const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
+ const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const BooleanOp bop{fsetp.bop};
+ const FPCompareOp compare_op{fsetp.compare_op};
+ const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
+ const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(fsetp.dest_pred_a, result_a);
+ v.ir.SetPred(fsetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::FSETP_reg(u64 insn) {
+ FSETP(*this, insn, GetFloatReg20(insn));
+}
+
+void TranslatorVisitor::FSETP_cbuf(u64 insn) {
+ FSETP(*this, insn, GetFloatCbuf(insn));
+}
+
+void TranslatorVisitor::FSETP_imm(u64 insn) {
+ FSETP(*this, insn, GetFloatImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::FSWZADD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<28, 8, u64> swizzle;
+ BitField<38, 1, u64> ndv;
+ BitField<39, 2, FpRounding> round;
+ BitField<44, 1, u64> ftz;
+ BitField<47, 1, u64> cc;
+ } const fswzadd{insn};
+
+ if (fswzadd.ndv != 0) {
+ throw NotImplementedException("FSWZADD NDV");
+ }
+
+ const IR::F32 src_a{GetFloatReg8(insn)};
+ const IR::F32 src_b{GetFloatReg20(insn)};
+ const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
+
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(fswzadd.round),
+ .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
+ F(fswzadd.dest_reg, result);
+
+ if (fswzadd.cc != 0) {
+ throw NotImplementedException("FSWZADD CC");
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hadd2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+ IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
+}
+
+void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
+ const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<39, 1, u64> ftz;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hadd2{insn};
+
+ HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
+ hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HADD2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
+ GetReg20(insn));
+}
+
+void TranslatorVisitor::HADD2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> abs_b;
+ } const hadd2{insn};
+
+ HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
+ GetCbuf(insn));
+}
+
+void TranslatorVisitor::HADD2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hadd2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
+ HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HADD2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 1, u64> ftz;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_a;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hadd2{insn};
+
+ const u32 imm{static_cast<u32>(hadd2.imm32)};
+ HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
+ hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
+ Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
+ bool sat, HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hfma2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ if (lhs_c.Type() == IR::Type::F16) {
+ lhs_c = v.ir.FPConvert(32, lhs_c);
+ rhs_c = v.ir.FPConvert(32, rhs_c);
+ }
+ }
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
+
+ lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
+ rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
+}
+
+void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
+ Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<49, 2, Merge> merge;
+ } const hfma2{insn};
+
+ HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
+ sat, precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HFMA2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<28, 2, Swizzle> swizzle_b;
+ BitField<32, 1, u64> saturate;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> neg_c;
+ BitField<35, 2, Swizzle> swizzle_c;
+ BitField<37, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
+ GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
+ GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+ BitField<56, 1, u64> neg_b;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
+ GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> neg_c;
+ BitField<52, 1, u64> saturate;
+ BitField<53, 2, Swizzle> swizzle_c;
+
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<57, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
+ GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
+}
+
+void TranslatorVisitor::HFMA2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_c;
+ BitField<20, 32, u64> imm32;
+ BitField<52, 1, u64> neg_c;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<55, 2, HalfPrecision> precision;
+ } const hfma2{insn};
+
+ const u32 imm{static_cast<u32>(hfma2.imm32)};
+ HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
+ Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
+ switch (precision) {
+ case HalfPrecision::None:
+ return IR::FmzMode::None;
+ case HalfPrecision::FTZ:
+ return IR::FmzMode::FTZ;
+ case HalfPrecision::FMZ:
+ return IR::FmzMode::FMZ;
+ default:
+ return IR::FmzMode::DontCare;
+ }
+}
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
+ switch (swizzle) {
+ case Swizzle::H1_H0: {
+ const IR::Value vector{ir.UnpackFloat2x16(value)};
+ return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
+ }
+ case Swizzle::H0_H0: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
+ return {scalar, scalar};
+ }
+ case Swizzle::H1_H1: {
+ const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
+ return {scalar, scalar};
+ }
+ case Swizzle::F32: {
+ const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
+ return {scalar, scalar};
+ }
+ }
+ throw InvalidArgument("Invalid swizzle {}", swizzle);
+}
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge) {
+ switch (merge) {
+ case Merge::H1_H0:
+ return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
+ case Merge::F32:
+ return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
+ case Merge::MRG_H0:
+ case Merge::MRG_H1: {
+ const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
+ const bool is_h0{merge == Merge::MRG_H0};
+ const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
+ return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
+ }
+ }
+ throw InvalidArgument("Invalid merge {}", merge);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+enum class Merge : u64 {
+ H1_H0,
+ F32,
+ MRG_H0,
+ MRG_H1,
+};
+
+enum class Swizzle : u64 {
+ H1_H0,
+ F32,
+ H0_H0,
+ H1_H1,
+};
+
+enum class HalfPrecision : u64 {
+ None = 0,
+ FTZ = 1,
+ FMZ = 2,
+};
+
+IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
+
+std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
+
+IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
+ Merge merge);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
+ Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
+ HalfPrecision precision) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const hmul2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+ const bool promotion{lhs_a.Type() != lhs_b.Type()};
+ if (promotion) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+ lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl fp_control{
+ .no_contraction = true,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = HalfPrecision2FmzMode(precision),
+ };
+ IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
+ IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
+ if (precision == HalfPrecision::FMZ && !sat) {
+ // Do not implement FMZ if SAT is enabled, as it does the logic for us.
+ // On D3D9 mode, anything * 0 is zero, even NAN and infinity
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
+ const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
+ const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
+ lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
+
+ const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
+ const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
+ const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
+ rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
+ }
+ if (sat) {
+ lhs = v.ir.FPSaturate(lhs);
+ rhs = v.ir.FPSaturate(rhs);
+ }
+ if (promotion) {
+ lhs = v.ir.FPConvert(16, lhs);
+ rhs = v.ir.FPConvert(16, rhs);
+ }
+ v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
+}
+
+void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
+ Swizzle swizzle_b, const IR::U32& src_b) {
+ union {
+ u64 raw;
+ BitField<49, 2, Merge> merge;
+ BitField<47, 2, Swizzle> swizzle_a;
+ BitField<39, 2, HalfPrecision> precision;
+ } const hmul2{insn};
+
+ HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
+ hmul2.precision);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HMUL2_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<32, 1, u64> sat;
+ BitField<31, 1, u64> neg_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<44, 1, u64> abs_a;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
+ hmul2.swizzle_b, GetReg20(insn));
+}
+
+void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<54, 1, u64> abs_b;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
+ Swizzle::F32, GetCbuf(insn));
+}
+
+void TranslatorVisitor::HMUL2_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> sat;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ BitField<43, 1, u64> neg_a;
+ BitField<44, 1, u64> abs_a;
+ } const hmul2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
+ HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm));
+}
+
+void TranslatorVisitor::HMUL2_32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<55, 2, HalfPrecision> precision;
+ BitField<52, 1, u64> sat;
+ BitField<53, 2, Swizzle> swizzle_a;
+ BitField<20, 32, u64> imm32;
+ } const hmul2{insn};
+
+ const u32 imm{static_cast<u32>(hmul2.imm32)};
+ HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
+ Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
+ bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hset2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hset2.pred)};
+ if (hset2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
+
+ const u32 true_value = bf ? 0x3c00 : 0xffff;
+ const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
+ const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
+ const IR::U32 fail_result{v.ir.Imm32(0)};
+ const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
+ const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
+
+ v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSET2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> bf;
+ BitField<31, 1, u64> neg_b;
+ BitField<50, 1, u64> ftz;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
+ hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
+}
+
+void TranslatorVisitor::HSET2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<56, 1, u64> neg_b;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hset2{insn};
+
+ HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
+ hset2.compare_op, Swizzle::F32);
+}
+
+void TranslatorVisitor::HSET2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> bf;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hset2{insn};
+
+ const u32 imm{
+ static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
+ Swizzle::H1_H0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
+ Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
+ union {
+ u64 insn;
+ BitField<8, 8, IR::Reg> src_a_reg;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> neg_a;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<44, 1, u64> abs_a;
+ BitField<6, 1, u64> ftz;
+ BitField<47, 2, Swizzle> swizzle_a;
+ } const hsetp2{insn};
+
+ auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
+ auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
+
+ if (lhs_a.Type() != lhs_b.Type()) {
+ if (lhs_a.Type() == IR::Type::F16) {
+ lhs_a = v.ir.FPConvert(32, lhs_a);
+ rhs_a = v.ir.FPConvert(32, rhs_a);
+ }
+ if (lhs_b.Type() == IR::Type::F16) {
+ lhs_b = v.ir.FPConvert(32, lhs_b);
+ rhs_b = v.ir.FPConvert(32, rhs_b);
+ }
+ }
+
+ lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+ rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
+
+ lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
+ rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
+
+ const IR::FpControl control{
+ .no_contraction = false,
+ .rounding = IR::FpRounding::DontCare,
+ .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
+ };
+
+ IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
+ if (hsetp2.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
+ const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
+ const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
+ const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
+
+ if (h_and) {
+ auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
+ v.ir.SetPred(hsetp2.dest_pred_a, result);
+ v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
+ } else {
+ v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
+ v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::HSETP2_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<30, 1, u64> abs_b;
+ BitField<49, 1, u64> h_and;
+ BitField<31, 1, u64> neg_b;
+ BitField<35, 4, FPCompareOp> compare_op;
+ BitField<28, 2, Swizzle> swizzle_b;
+ } const hsetp2{insn};
+ HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> abs_b;
+ BitField<56, 1, u64> neg_b;
+ BitField<49, 4, FPCompareOp> compare_op;
+ } const hsetp2{insn};
+
+ HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
+ hsetp2.compare_op, hsetp2.h_and != 0);
+}
+
+void TranslatorVisitor::HSETP2_imm(u64 insn) {
+ union {
+ u64 insn;
+ BitField<53, 1, u64> h_and;
+ BitField<54, 1, u64> ftz;
+ BitField<49, 4, FPCompareOp> compare_op;
+ BitField<56, 1, u64> neg_high;
+ BitField<30, 9, u64> high;
+ BitField<29, 1, u64> neg_low;
+ BitField<20, 9, u64> low;
+ } const hsetp2{insn};
+
+ const u32 imm{static_cast<u32>(hsetp2.low << 6) |
+ static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
+ static_cast<u32>(hsetp2.high << 22) |
+ static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
+
+ HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
+ hsetp2.h_and != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
+ u32 offset) {
+ if (unaligned) {
+ return ir.Imm32(0);
+ }
+ return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
+}
+} // Anonymous namespace
+
+IR::U32 TranslatorVisitor::X(IR::Reg reg) {
+ return ir.GetReg(reg);
+}
+
+IR::U64 TranslatorVisitor::L(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+IR::F32 TranslatorVisitor::F(IR::Reg reg) {
+ return ir.BitCast<IR::F32>(X(reg));
+}
+
+IR::F64 TranslatorVisitor::D(IR::Reg reg) {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
+}
+
+void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
+ ir.SetReg(dest_reg, value);
+}
+
+void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackUint2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
+ X(dest_reg, ir.BitCast<IR::U32>(value));
+}
+
+void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register {}", dest_reg);
+ }
+ const IR::Value result{ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; i++) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+}
+
+IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return X(reg.index);
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg8(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg20(insn));
+}
+
+IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
+ return ir.BitCast<IR::F32>(GetReg39(insn));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
+ union {
+ u64 raw;
+ BitField<39, 8, IR::Reg> index;
+ } const reg{insn};
+ return D(reg.index);
+}
+
+static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 14, u64> offset;
+ BitField<34, 5, u64> binding;
+ } const cbuf{insn};
+
+ if (cbuf.binding >= 18) {
+ throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
+ }
+ if (cbuf.offset >= 0x10'000) {
+ throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
+ }
+ const IR::Value binding{static_cast<u32>(cbuf.binding)};
+ const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
+ return {IR::U32{binding}, IR::U32{byte_offset}};
+}
+
+IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetCbuf(binding, byte_offset);
+}
+
+IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
+ const auto [binding, byte_offset]{CbufAddr(insn)};
+ return ir.GetFloatCbuf(binding, byte_offset);
+}
+
+IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ const auto [binding, offset_value]{CbufAddr(insn)};
+ const bool unaligned{cbuf.unaligned != 0};
+ const u32 offset{offset_value.U32()};
+ const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
+
+ const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
+ const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
+ return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
+}
+
+IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 1, u64> unaligned;
+ } const cbuf{insn};
+
+ if (cbuf.unaligned != 0) {
+ throw NotImplementedException("Unaligned packed constant buffer read");
+ }
+ const auto [binding, lower_offset]{CbufAddr(insn)};
+ const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
+ const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
+ const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
+ return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
+}
+
+IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+
+ if (imm.is_negative != 0) {
+ const s64 raw{static_cast<s64>(imm.value)};
+ return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
+ } else {
+ return ir.Imm32(static_cast<u32>(imm.value));
+ }
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
+ const u32 value{static_cast<u32>(imm.value) << 12};
+ return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
+}
+
+IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 19, u64> value;
+ BitField<56, 1, u64> is_negative;
+ } const imm{insn};
+ const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
+ const u64 value{imm.value << 44};
+ return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
+}
+
+IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
+ const s64 value{GetImm20(insn).U32()};
+ return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
+}
+
+IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(static_cast<u32>(imm.value));
+}
+
+IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 32, u64> value;
+ } const imm{insn};
+ return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
+}
+
+void TranslatorVisitor::SetZFlag(const IR::U1& value) {
+ ir.SetZFlag(value);
+}
+
+void TranslatorVisitor::SetSFlag(const IR::U1& value) {
+ ir.SetSFlag(value);
+}
+
+void TranslatorVisitor::SetCFlag(const IR::U1& value) {
+ ir.SetCFlag(value);
+}
+
+void TranslatorVisitor::SetOFlag(const IR::U1& value) {
+ ir.SetOFlag(value);
+}
+
+void TranslatorVisitor::ResetZero() {
+ SetZFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetSFlag() {
+ SetSFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetCFlag() {
+ SetCFlag(ir.Imm1(false));
+}
+
+void TranslatorVisitor::ResetOFlag() {
+ SetOFlag(ir.Imm1(false));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/instruction.h"
+
+namespace Shader::Maxwell {
+
+enum class CompareOp : u64 {
+ False,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+enum class BooleanOp : u64 {
+ AND,
+ OR,
+ XOR,
+};
+
+enum class PredicateOp : u64 {
+ False,
+ True,
+ Zero,
+ NonZero,
+};
+
+enum class FPCompareOp : u64 {
+ F,
+ LT,
+ EQ,
+ LE,
+ GT,
+ NE,
+ GE,
+ NUM,
+ Nan,
+ LTU,
+ EQU,
+ LEU,
+ GTU,
+ NEU,
+ GEU,
+ T,
+};
+
+class TranslatorVisitor {
+public:
+ explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
+
+ Environment& env;
+ IR::IREmitter ir;
+
+ void AL2P(u64 insn);
+ void ALD(u64 insn);
+ void AST(u64 insn);
+ void ATOM_cas(u64 insn);
+ void ATOM(u64 insn);
+ void ATOMS_cas(u64 insn);
+ void ATOMS(u64 insn);
+ void B2R(u64 insn);
+ void BAR(u64 insn);
+ void BFE_reg(u64 insn);
+ void BFE_cbuf(u64 insn);
+ void BFE_imm(u64 insn);
+ void BFI_reg(u64 insn);
+ void BFI_rc(u64 insn);
+ void BFI_cr(u64 insn);
+ void BFI_imm(u64 insn);
+ void BPT(u64 insn);
+ void BRA(u64 insn);
+ void BRK(u64 insn);
+ void BRX(u64 insn);
+ void CAL();
+ void CCTL(u64 insn);
+ void CCTLL(u64 insn);
+ void CONT(u64 insn);
+ void CS2R(u64 insn);
+ void CSET(u64 insn);
+ void CSETP(u64 insn);
+ void DADD_reg(u64 insn);
+ void DADD_cbuf(u64 insn);
+ void DADD_imm(u64 insn);
+ void DEPBAR();
+ void DFMA_reg(u64 insn);
+ void DFMA_rc(u64 insn);
+ void DFMA_cr(u64 insn);
+ void DFMA_imm(u64 insn);
+ void DMNMX_reg(u64 insn);
+ void DMNMX_cbuf(u64 insn);
+ void DMNMX_imm(u64 insn);
+ void DMUL_reg(u64 insn);
+ void DMUL_cbuf(u64 insn);
+ void DMUL_imm(u64 insn);
+ void DSET_reg(u64 insn);
+ void DSET_cbuf(u64 insn);
+ void DSET_imm(u64 insn);
+ void DSETP_reg(u64 insn);
+ void DSETP_cbuf(u64 insn);
+ void DSETP_imm(u64 insn);
+ void EXIT();
+ void F2F_reg(u64 insn);
+ void F2F_cbuf(u64 insn);
+ void F2F_imm(u64 insn);
+ void F2I_reg(u64 insn);
+ void F2I_cbuf(u64 insn);
+ void F2I_imm(u64 insn);
+ void FADD_reg(u64 insn);
+ void FADD_cbuf(u64 insn);
+ void FADD_imm(u64 insn);
+ void FADD32I(u64 insn);
+ void FCHK_reg(u64 insn);
+ void FCHK_cbuf(u64 insn);
+ void FCHK_imm(u64 insn);
+ void FCMP_reg(u64 insn);
+ void FCMP_rc(u64 insn);
+ void FCMP_cr(u64 insn);
+ void FCMP_imm(u64 insn);
+ void FFMA_reg(u64 insn);
+ void FFMA_rc(u64 insn);
+ void FFMA_cr(u64 insn);
+ void FFMA_imm(u64 insn);
+ void FFMA32I(u64 insn);
+ void FLO_reg(u64 insn);
+ void FLO_cbuf(u64 insn);
+ void FLO_imm(u64 insn);
+ void FMNMX_reg(u64 insn);
+ void FMNMX_cbuf(u64 insn);
+ void FMNMX_imm(u64 insn);
+ void FMUL_reg(u64 insn);
+ void FMUL_cbuf(u64 insn);
+ void FMUL_imm(u64 insn);
+ void FMUL32I(u64 insn);
+ void FSET_reg(u64 insn);
+ void FSET_cbuf(u64 insn);
+ void FSET_imm(u64 insn);
+ void FSETP_reg(u64 insn);
+ void FSETP_cbuf(u64 insn);
+ void FSETP_imm(u64 insn);
+ void FSWZADD(u64 insn);
+ void GETCRSPTR(u64 insn);
+ void GETLMEMBASE(u64 insn);
+ void HADD2_reg(u64 insn);
+ void HADD2_cbuf(u64 insn);
+ void HADD2_imm(u64 insn);
+ void HADD2_32I(u64 insn);
+ void HFMA2_reg(u64 insn);
+ void HFMA2_rc(u64 insn);
+ void HFMA2_cr(u64 insn);
+ void HFMA2_imm(u64 insn);
+ void HFMA2_32I(u64 insn);
+ void HMUL2_reg(u64 insn);
+ void HMUL2_cbuf(u64 insn);
+ void HMUL2_imm(u64 insn);
+ void HMUL2_32I(u64 insn);
+ void HSET2_reg(u64 insn);
+ void HSET2_cbuf(u64 insn);
+ void HSET2_imm(u64 insn);
+ void HSETP2_reg(u64 insn);
+ void HSETP2_cbuf(u64 insn);
+ void HSETP2_imm(u64 insn);
+ void I2F_reg(u64 insn);
+ void I2F_cbuf(u64 insn);
+ void I2F_imm(u64 insn);
+ void I2I_reg(u64 insn);
+ void I2I_cbuf(u64 insn);
+ void I2I_imm(u64 insn);
+ void IADD_reg(u64 insn);
+ void IADD_cbuf(u64 insn);
+ void IADD_imm(u64 insn);
+ void IADD3_reg(u64 insn);
+ void IADD3_cbuf(u64 insn);
+ void IADD3_imm(u64 insn);
+ void IADD32I(u64 insn);
+ void ICMP_reg(u64 insn);
+ void ICMP_rc(u64 insn);
+ void ICMP_cr(u64 insn);
+ void ICMP_imm(u64 insn);
+ void IDE(u64 insn);
+ void IDP_reg(u64 insn);
+ void IDP_imm(u64 insn);
+ void IMAD_reg(u64 insn);
+ void IMAD_rc(u64 insn);
+ void IMAD_cr(u64 insn);
+ void IMAD_imm(u64 insn);
+ void IMAD32I(u64 insn);
+ void IMADSP_reg(u64 insn);
+ void IMADSP_rc(u64 insn);
+ void IMADSP_cr(u64 insn);
+ void IMADSP_imm(u64 insn);
+ void IMNMX_reg(u64 insn);
+ void IMNMX_cbuf(u64 insn);
+ void IMNMX_imm(u64 insn);
+ void IMUL_reg(u64 insn);
+ void IMUL_cbuf(u64 insn);
+ void IMUL_imm(u64 insn);
+ void IMUL32I(u64 insn);
+ void IPA(u64 insn);
+ void ISBERD(u64 insn);
+ void ISCADD_reg(u64 insn);
+ void ISCADD_cbuf(u64 insn);
+ void ISCADD_imm(u64 insn);
+ void ISCADD32I(u64 insn);
+ void ISET_reg(u64 insn);
+ void ISET_cbuf(u64 insn);
+ void ISET_imm(u64 insn);
+ void ISETP_reg(u64 insn);
+ void ISETP_cbuf(u64 insn);
+ void ISETP_imm(u64 insn);
+ void JCAL(u64 insn);
+ void JMP(u64 insn);
+ void JMX(u64 insn);
+ void KIL();
+ void LD(u64 insn);
+ void LDC(u64 insn);
+ void LDG(u64 insn);
+ void LDL(u64 insn);
+ void LDS(u64 insn);
+ void LEA_hi_reg(u64 insn);
+ void LEA_hi_cbuf(u64 insn);
+ void LEA_lo_reg(u64 insn);
+ void LEA_lo_cbuf(u64 insn);
+ void LEA_lo_imm(u64 insn);
+ void LEPC(u64 insn);
+ void LONGJMP(u64 insn);
+ void LOP_reg(u64 insn);
+ void LOP_cbuf(u64 insn);
+ void LOP_imm(u64 insn);
+ void LOP3_reg(u64 insn);
+ void LOP3_cbuf(u64 insn);
+ void LOP3_imm(u64 insn);
+ void LOP32I(u64 insn);
+ void MEMBAR(u64 insn);
+ void MOV_reg(u64 insn);
+ void MOV_cbuf(u64 insn);
+ void MOV_imm(u64 insn);
+ void MOV32I(u64 insn);
+ void MUFU(u64 insn);
+ void NOP(u64 insn);
+ void OUT_reg(u64 insn);
+ void OUT_cbuf(u64 insn);
+ void OUT_imm(u64 insn);
+ void P2R_reg(u64 insn);
+ void P2R_cbuf(u64 insn);
+ void P2R_imm(u64 insn);
+ void PBK();
+ void PCNT();
+ void PEXIT(u64 insn);
+ void PIXLD(u64 insn);
+ void PLONGJMP(u64 insn);
+ void POPC_reg(u64 insn);
+ void POPC_cbuf(u64 insn);
+ void POPC_imm(u64 insn);
+ void PRET(u64 insn);
+ void PRMT_reg(u64 insn);
+ void PRMT_rc(u64 insn);
+ void PRMT_cr(u64 insn);
+ void PRMT_imm(u64 insn);
+ void PSET(u64 insn);
+ void PSETP(u64 insn);
+ void R2B(u64 insn);
+ void R2P_reg(u64 insn);
+ void R2P_cbuf(u64 insn);
+ void R2P_imm(u64 insn);
+ void RAM(u64 insn);
+ void RED(u64 insn);
+ void RET(u64 insn);
+ void RRO_reg(u64 insn);
+ void RRO_cbuf(u64 insn);
+ void RRO_imm(u64 insn);
+ void RTT(u64 insn);
+ void S2R(u64 insn);
+ void SAM(u64 insn);
+ void SEL_reg(u64 insn);
+ void SEL_cbuf(u64 insn);
+ void SEL_imm(u64 insn);
+ void SETCRSPTR(u64 insn);
+ void SETLMEMBASE(u64 insn);
+ void SHF_l_reg(u64 insn);
+ void SHF_l_imm(u64 insn);
+ void SHF_r_reg(u64 insn);
+ void SHF_r_imm(u64 insn);
+ void SHFL(u64 insn);
+ void SHL_reg(u64 insn);
+ void SHL_cbuf(u64 insn);
+ void SHL_imm(u64 insn);
+ void SHR_reg(u64 insn);
+ void SHR_cbuf(u64 insn);
+ void SHR_imm(u64 insn);
+ void SSY();
+ void ST(u64 insn);
+ void STG(u64 insn);
+ void STL(u64 insn);
+ void STP(u64 insn);
+ void STS(u64 insn);
+ void SUATOM(u64 insn);
+ void SUATOM_cas(u64 insn);
+ void SULD(u64 insn);
+ void SURED(u64 insn);
+ void SUST(u64 insn);
+ void SYNC(u64 insn);
+ void TEX(u64 insn);
+ void TEX_b(u64 insn);
+ void TEXS(u64 insn);
+ void TLD(u64 insn);
+ void TLD_b(u64 insn);
+ void TLD4(u64 insn);
+ void TLD4_b(u64 insn);
+ void TLD4S(u64 insn);
+ void TLDS(u64 insn);
+ void TMML(u64 insn);
+ void TMML_b(u64 insn);
+ void TXA(u64 insn);
+ void TXD(u64 insn);
+ void TXD_b(u64 insn);
+ void TXQ(u64 insn);
+ void TXQ_b(u64 insn);
+ void VABSDIFF(u64 insn);
+ void VABSDIFF4(u64 insn);
+ void VADD(u64 insn);
+ void VMAD(u64 insn);
+ void VMNMX(u64 insn);
+ void VOTE(u64 insn);
+ void VOTE_vtg(u64 insn);
+ void VSET(u64 insn);
+ void VSETP(u64 insn);
+ void VSHL(u64 insn);
+ void VSHR(u64 insn);
+ void XMAD_reg(u64 insn);
+ void XMAD_rc(u64 insn);
+ void XMAD_cr(u64 insn);
+ void XMAD_imm(u64 insn);
+
+ [[nodiscard]] IR::U32 X(IR::Reg reg);
+ [[nodiscard]] IR::U64 L(IR::Reg reg);
+ [[nodiscard]] IR::F32 F(IR::Reg reg);
+ [[nodiscard]] IR::F64 D(IR::Reg reg);
+
+ void X(IR::Reg dest_reg, const IR::U32& value);
+ void L(IR::Reg dest_reg, const IR::U64& value);
+ void F(IR::Reg dest_reg, const IR::F32& value);
+ void D(IR::Reg dest_reg, const IR::F64& value);
+
+ [[nodiscard]] IR::U32 GetReg8(u64 insn);
+ [[nodiscard]] IR::U32 GetReg20(u64 insn);
+ [[nodiscard]] IR::U32 GetReg39(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
+
+ [[nodiscard]] IR::U32 GetCbuf(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm20(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
+ [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
+ [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
+
+ [[nodiscard]] IR::U32 GetImm32(u64 insn);
+ [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
+
+ void SetZFlag(const IR::U1& value);
+ void SetSFlag(const IR::U1& value);
+ void SetCFlag(const IR::U1& value);
+ void SetOFlag(const IR::U1& value);
+
+ void ResetZero();
+ void ResetSFlag();
+ void ResetCFlag();
+ void ResetOFlag();
+};
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
+ bool cc) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_a;
+ } const iadd{insn};
+
+ if (sat) {
+ throw NotImplementedException("IADD SAT");
+ }
+ if (x && po) {
+ throw NotImplementedException("IADD X+PO");
+ }
+ // Operand A is always read from here, negated if needed
+ IR::U32 op_a{v.X(iadd.src_a)};
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ // Add both operands
+ IR::U32 result{v.ir.IAdd(op_a, op_b)};
+ if (x) {
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ result = v.ir.IAdd(result, carry);
+ }
+ if (po) {
+ // .PO adds one to the result
+ result = v.ir.IAdd(result, v.ir.Imm32(1));
+ }
+ if (cc) {
+ // Store flags
+ // TODO: Does this grab the result pre-PO or after?
+ if (po) {
+ throw NotImplementedException("IADD CC+PO");
+ }
+ // TODO: How does CC behave when X is set?
+ if (x) {
+ throw NotImplementedException("IADD X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ v.SetOFlag(v.ir.GetOverflowFromOp(result));
+ }
+ // Store result
+ v.X(iadd.dest_reg, result);
+}
+
+void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 insn;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> three_for_po;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<50, 1, u64> sat;
+ } const iadd{insn};
+
+ const bool po{iadd.three_for_po == 3};
+ if (!po && iadd.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD_reg(u64 insn) {
+ IADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IADD_cbuf(u64 insn) {
+ IADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IADD_imm(u64 insn) {
+ IADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::IADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 1, u64> x;
+ BitField<54, 1, u64> sat;
+ BitField<55, 2, u64> three_for_po;
+ BitField<56, 1, u64> neg_a;
+ } const iadd32i{insn};
+
+ const bool po{iadd32i.three_for_po == 3};
+ const bool neg_a{!po && iadd32i.neg_a != 0};
+ IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Shift : u64 {
+ None,
+ Right,
+ Left,
+};
+enum class Half : u64 {
+ All,
+ Lower,
+ Upper,
+};
+
+[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
+ constexpr bool is_signed{false};
+ switch (half) {
+ case Half::All:
+ return value;
+ case Half::Lower:
+ return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
+ case Half::Upper:
+ return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
+ }
+ throw NotImplementedException("Invalid half");
+}
+
+[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
+ switch (shift) {
+ case Shift::None:
+ return value;
+ case Shift::Right: {
+ // 33-bit RS IADD3 edge case
+ const IR::U1 edge_case{ir.GetCarryFromOp(value)};
+ const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
+ return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
+ }
+ case Shift::Left:
+ return ir.ShiftLeftLogical(value, ir.Imm32(16));
+ }
+ throw NotImplementedException("Invalid shift");
+}
+
+void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
+ Shift shift = Shift::None) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> x;
+ BitField<49, 1, u64> neg_c;
+ BitField<50, 1, u64> neg_b;
+ BitField<51, 1, u64> neg_a;
+ } iadd3{insn};
+
+ if (iadd3.neg_a != 0) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (iadd3.neg_b != 0) {
+ op_b = v.ir.INeg(op_b);
+ }
+ if (iadd3.neg_c != 0) {
+ op_c = v.ir.INeg(op_c);
+ }
+ IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
+ if (iadd3.x != 0) {
+ // TODO: How does RS behave when X is set?
+ if (shift == Shift::Right) {
+ throw NotImplementedException("IADD3 X+RS");
+ }
+ const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
+ lhs_1 = v.ir.IAdd(lhs_1, carry);
+ }
+ const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
+ const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
+
+ v.X(iadd3.dest_reg, result);
+ if (iadd3.cc != 0) {
+ // TODO: How does CC behave when X is set?
+ if (iadd3.x != 0) {
+ throw NotImplementedException("IADD3 X+CC");
+ }
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.SetCFlag(v.ir.GetCarryFromOp(result));
+ const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
+ v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IADD3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<37, 2, Shift> shift;
+ BitField<35, 2, Half> half_a;
+ BitField<33, 2, Half> half_b;
+ BitField<31, 2, Half> half_c;
+ } const iadd3{insn};
+
+ const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
+ const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
+ const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
+ IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
+}
+
+void TranslatorVisitor::IADD3_cbuf(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::IADD3_imm(u64 insn) {
+ IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const icmp{insn};
+
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool is_signed{icmp.is_signed != 0};
+ const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
+
+ const IR::U32 src_reg{v.X(icmp.src_reg)};
+ const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
+
+ v.X(icmp.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ICMP_reg(u64 insn) {
+ ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_rc(u64 insn) {
+ ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
+}
+
+void TranslatorVisitor::ICMP_cr(u64 insn) {
+ ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
+}
+
+void TranslatorVisitor::ICMP_imm(u64 insn) {
+ ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 1, u64> x;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const iset{insn};
+
+ const IR::U32 src_a{v.X(iset.src_reg)};
+ const bool is_signed{iset.is_signed != 0};
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const bool x{iset.x != 0};
+ const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
+
+ IR::U1 pred{v.ir.GetPred(iset.pred)};
+ if (iset.neg_pred != 0) {
+ pred = v.ir.LogicalNot(pred);
+ }
+ const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
+
+ const IR::U32 one_mask{v.ir.Imm32(-1)};
+ const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
+ const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
+ const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
+
+ v.X(iset.dest_reg, result);
+ if (iset.cc != 0) {
+ if (x) {
+ throw NotImplementedException("ISET.CC + X");
+ }
+ const IR::U1 is_zero{v.ir.IEqual(result, zero)};
+ v.SetZFlag(is_zero);
+ if (iset.bf != 0) {
+ v.ResetSFlag();
+ } else {
+ v.SetSFlag(v.ir.LogicalNot(is_zero));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISET_reg(u64 insn) {
+ ISET(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISET_cbuf(u64 insn) {
+ ISET(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISET_imm(u64 insn) {
+ ISET(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class FloatFormat : u64 {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+};
+
+enum class IntFormat : u64 {
+ U8 = 0,
+ U16 = 1,
+ U32 = 2,
+ U64 = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, FloatFormat> float_format;
+ BitField<10, 2, IntFormat> int_format;
+ BitField<13, 1, u64> is_signed;
+ BitField<39, 2, FpRounding> fp_rounding;
+ BitField<41, 2, u64> selector;
+ BitField<47, 1, u64> cc;
+ BitField<45, 1, u64> neg;
+ BitField<49, 1, u64> abs;
+};
+
+bool Is64(u64 insn) {
+ return Encoding{insn}.int_format == IntFormat::U64;
+}
+
+int BitSize(FloatFormat format) {
+ switch (format) {
+ case FloatFormat::F16:
+ return 16;
+ case FloatFormat::F32:
+ return 32;
+ case FloatFormat::F64:
+ return 64;
+ }
+ throw NotImplementedException("Invalid float format {}", format);
+}
+
+IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
+ const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
+ const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
+ const IR::U1 is_least{v.ir.IEqual(value, least_value)};
+ return IR::U32{v.ir.Select(is_least, value, absolute)};
+}
+
+void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
+ const Encoding i2f{insn};
+ if (i2f.cc != 0) {
+ throw NotImplementedException("I2F CC");
+ }
+ const bool is_signed{i2f.is_signed != 0};
+ int src_bitsize{};
+ switch (i2f.int_format) {
+ case IntFormat::U8:
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(8), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 8);
+ }
+ src_bitsize = 8;
+ break;
+ case IntFormat::U16:
+ if (i2f.selector == 1 || i2f.selector == 3) {
+ throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
+ }
+ src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
+ v.ir.Imm32(16), is_signed);
+ if (i2f.abs != 0) {
+ src = SmallAbs(v, src, 16);
+ }
+ src_bitsize = 16;
+ break;
+ case IntFormat::U32:
+ case IntFormat::U64:
+ if (i2f.selector != 0) {
+ throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
+ }
+ if (i2f.abs != 0 && is_signed) {
+ src = v.ir.IAbs(src);
+ }
+ src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
+ break;
+ }
+ const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
+ const int dst_bitsize{BitSize(i2f.float_format)};
+ const IR::FpControl fp_control{
+ .no_contraction = false,
+ .rounding = CastFpRounding(i2f.fp_rounding),
+ .fmz_mode = IR::FmzMode::DontCare,
+ };
+ auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
+ static_cast<size_t>(conversion_src_bitsize), is_signed, src,
+ fp_control)};
+ if (i2f.neg != 0) {
+ if (i2f.abs != 0 || !is_signed) {
+ // We know the value is positive
+ value = v.ir.FPNeg(value);
+ } else {
+ // Only negate if the input isn't the lowest value
+ IR::U1 is_least;
+ if (src_bitsize == 64) {
+ is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
+ } else if (src_bitsize == 32) {
+ is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
+ } else {
+ const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
+ is_least = v.ir.IEqual(src, least_value);
+ }
+ value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
+ }
+ }
+ switch (i2f.float_format) {
+ case FloatFormat::F16: {
+ const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
+ v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
+ break;
+ }
+ case FloatFormat::F32:
+ v.F(i2f.dest_reg, value);
+ break;
+ case FloatFormat::F64: {
+ if (!IR::IsAligned(i2f.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
+ }
+ const IR::Value vector{v.ir.UnpackDouble2x32(value)};
+ for (int i = 0; i < 2; ++i) {
+ v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2F_reg(u64 insn) {
+ if (Is64(insn)) {
+ union {
+ u64 raw;
+ BitField<20, 8, IR::Reg> reg;
+ } const value{insn};
+ const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
+ I2F(*this, insn, ir.PackUint2x32(regs));
+ } else {
+ I2F(*this, insn, GetReg20(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_cbuf(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedCbuf(insn));
+ } else {
+ I2F(*this, insn, GetCbuf(insn));
+ }
+}
+
+void TranslatorVisitor::I2F_imm(u64 insn) {
+ if (Is64(insn)) {
+ I2F(*this, insn, GetPackedImm20(insn));
+ } else {
+ I2F(*this, insn, GetImm20(insn));
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class MaxShift : u64 {
+ U32,
+ Undefined,
+ U64,
+ S64,
+};
+
+IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
+ bool right_shift, bool is_signed) {
+ if (!right_shift) {
+ return ir.ShiftLeftLogical(packed_int, safe_shift);
+ }
+ if (is_signed) {
+ return ir.ShiftRightArithmetic(packed_int, safe_shift);
+ }
+ return ir.ShiftRightLogical(packed_int, safe_shift);
+}
+
+void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
+ bool right_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<0, 8, IR::Reg> lo_bits_reg;
+ BitField<37, 2, MaxShift> max_shift;
+ BitField<47, 1, u64> cc;
+ BitField<48, 2, u64> x_mode;
+ BitField<50, 1, u64> wrap;
+ } const shf{insn};
+
+ if (shf.cc != 0) {
+ throw NotImplementedException("SHF CC");
+ }
+ if (shf.x_mode != 0) {
+ throw NotImplementedException("SHF X Mode");
+ }
+ if (shf.max_shift == MaxShift::Undefined) {
+ throw NotImplementedException("SHF Use of undefined MaxShift value");
+ }
+ const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
+ const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
+ const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
+ const IR::U32 safe_shift{shf.wrap != 0
+ ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
+ : v.ir.UMin(shift, max_shift)};
+
+ const bool is_signed{shf.max_shift == MaxShift::S64};
+ const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
+ const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
+
+ const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
+ v.X(shf.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHF_l_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_l_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
+}
+
+void TranslatorVisitor::SHF_r_reg(u64 insn) {
+ SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
+}
+
+void TranslatorVisitor::SHF_r_imm(u64 insn) {
+ SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ BitField<43, 2, u64> mode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const imnmx{insn};
+
+ if (imnmx.cc != 0) {
+ throw NotImplementedException("IMNMX CC");
+ }
+
+ if (imnmx.mode != 0) {
+ throw NotImplementedException("IMNMX.MODE");
+ }
+
+ const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
+ const IR::U32 op_a{v.X(imnmx.src_reg)};
+ IR::U32 min;
+ IR::U32 max;
+
+ if (imnmx.is_signed != 0) {
+ min = IR::U32{v.ir.SMin(op_a, op_b)};
+ max = IR::U32{v.ir.SMax(op_a, op_b)};
+ } else {
+ min = IR::U32{v.ir.UMin(op_a, op_b)};
+ max = IR::U32{v.ir.UMax(op_a, op_b)};
+ }
+ if (imnmx.neg_pred != 0) {
+ std::swap(min, max);
+ }
+
+ const IR::U32 result{v.ir.Select(pred, min, max)};
+ v.X(imnmx.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::IMNMX_reg(u64 insn) {
+ IMNMX(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
+ IMNMX(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::IMNMX_imm(u64 insn) {
+ IMNMX(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<40, 1, u64> tilde;
+ } const popc{insn};
+
+ const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
+ const IR::U32 result = v.ir.BitCount(operand);
+ v.X(popc.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::POPC_reg(u64 insn) {
+ POPC(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::POPC_cbuf(u64 insn) {
+ POPC(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::POPC_imm(u64 insn) {
+ POPC(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
+ u64 scale_imm) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> op_a;
+ } const iscadd{insn};
+
+ const bool po{neg_a && neg_b};
+ IR::U32 op_a{v.X(iscadd.op_a)};
+ if (po) {
+ // When PO is present, add one
+ op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
+ } else {
+ // When PO is not present, the bits are interpreted as negation
+ if (neg_a) {
+ op_a = v.ir.INeg(op_a);
+ }
+ if (neg_b) {
+ op_b = v.ir.INeg(op_b);
+ }
+ }
+ // With the operands already processed, scale A
+ const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
+ const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
+
+ const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
+ v.X(iscadd.dest_reg, result);
+
+ if (cc) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ const IR::U1 carry{v.ir.GetCarryFromOp(result)};
+ const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
+ v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
+ v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
+ }
+}
+
+void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
+ union {
+ u64 raw;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> neg_b;
+ BitField<49, 1, u64> neg_a;
+ BitField<39, 5, u64> scale;
+ } const iscadd{insn};
+
+ ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISCADD_reg(u64 insn) {
+ ISCADD(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
+ ISCADD(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISCADD_imm(u64 insn) {
+ ISCADD(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::ISCADD32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<52, 1, u64> cc;
+ BitField<53, 5, u64> scale;
+ } const iscadd{insn};
+
+ return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
+ CompareOp compare_op, bool is_signed, bool x) {
+ return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
+ : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
+}
+
+void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 1, u64> x;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, CompareOp> compare_op;
+ } const isetp{insn};
+
+ const bool is_signed{isetp.is_signed != 0};
+ const bool x{isetp.x != 0};
+ const BooleanOp bop{isetp.bop};
+ const CompareOp compare_op{isetp.compare_op};
+ const IR::U32 op_a{v.X(isetp.src_reg_a)};
+ const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
+ const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
+ const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
+ v.ir.SetPred(isetp.dest_pred_a, result_a);
+ v.ir.SetPred(isetp.dest_pred_b, result_b);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::ISETP_reg(u64 insn) {
+ ISETP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::ISETP_cbuf(u64 insn) {
+ ISETP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::ISETP_imm(u64 insn) {
+ ISETP(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> w;
+ BitField<43, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ } const shl{insn};
+
+ if (shl.x != 0) {
+ throw NotImplementedException("SHL.X");
+ }
+ if (shl.cc != 0) {
+ throw NotImplementedException("SHL.CC");
+ }
+ const IR::U32 base{v.X(shl.src_reg_a)};
+ IR::U32 result;
+ if (shl.w != 0) {
+ // When .W is set, the shift value is wrapped
+ // To emulate this we just have to wrap it ourselves.
+ const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
+ result = v.ir.ShiftLeftLogical(base, shift);
+ } else {
+ // When .W is not set, the shift value is clamped between 0 and 32.
+ // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
+ // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
+ //
+ // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
+ // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
+ // or equal to the bit width of the components of Base."
+ //
+ // And on the GLASM specification it is also safe to evaluate out of bounds:
+ //
+ // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
+ // "The results of a shift operation ("<<") are undefined if the value of the second operand
+ // is negative, or greater than or equal to the number of bits in the first operand."
+ //
+ // Emphasis on undefined results in contrast to undefined behavior.
+ //
+ const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
+ const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
+ result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
+ }
+ v.X(shl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHL_reg(u64 insn) {
+ SHL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHL_cbuf(u64 insn) {
+ SHL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHL_imm(u64 insn) {
+ SHL(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<39, 1, u64> is_wrapped;
+ BitField<40, 1, u64> brev;
+ BitField<43, 1, u64> xmode;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_signed;
+ } const shr{insn};
+
+ if (shr.xmode != 0) {
+ throw NotImplementedException("SHR.XMODE");
+ }
+ if (shr.cc != 0) {
+ throw NotImplementedException("SHR.CC");
+ }
+
+ IR::U32 base{v.X(shr.src_reg_a)};
+ if (shr.brev == 1) {
+ base = v.ir.BitReverse(base);
+ }
+ IR::U32 result;
+ const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
+ if (shr.is_signed == 1) {
+ result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
+ } else {
+ result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
+ }
+
+ if (shr.is_wrapped == 0) {
+ const IR::U32 zero{v.ir.Imm32(0)};
+ const IR::U32 safe_bits{v.ir.Imm32(32)};
+
+ const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
+ const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
+ const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
+ result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
+ }
+ v.X(shr.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHR_reg(u64 insn) {
+ SHR(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SHR_cbuf(u64 insn) {
+ SHR(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SHR_imm(u64 insn) {
+ SHR(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SelectMode : u64 {
+ Default,
+ CLO,
+ CHI,
+ CSFU,
+ CBCC,
+};
+
+enum class Half : u64 {
+ H0, // Least-significant bits (15:0)
+ H1, // Most-significant bits (31:16)
+};
+
+IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
+ const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
+ return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
+}
+
+void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
+ SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> is_a_signed;
+ BitField<49, 1, u64> is_b_signed;
+ BitField<53, 1, Half> half_a;
+ } const xmad{insn};
+
+ if (x) {
+ throw NotImplementedException("XMAD X");
+ }
+ const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
+ const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
+
+ IR::U32 product{v.ir.IMul(op_a, op_b)};
+ if (psl) {
+ // .PSL shifts the product 16 bits
+ product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
+ }
+ const IR::U32 op_c{[&]() -> IR::U32 {
+ switch (select_mode) {
+ case SelectMode::Default:
+ return src_c;
+ case SelectMode::CLO:
+ return ExtractHalf(v, src_c, Half::H0, false);
+ case SelectMode::CHI:
+ return ExtractHalf(v, src_c, Half::H1, false);
+ case SelectMode::CBCC:
+ return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
+ case SelectMode::CSFU:
+ throw NotImplementedException("XMAD CSFU");
+ }
+ throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
+ }()};
+ IR::U32 result{v.ir.IAdd(product, op_c)};
+ if (mrg) {
+ // .MRG inserts src_b [15:0] into result's [31:16].
+ const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
+ result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
+ }
+ if (xmad.cc) {
+ throw NotImplementedException("XMAD CC");
+ }
+ // Store result
+ v.X(xmad.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::XMAD_reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<35, 1, Half> half_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_rc(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
+ xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_cr(u64 insn) {
+ union {
+ u64 raw;
+ BitField<50, 2, SelectMode> select_mode;
+ BitField<52, 1, Half> half_b;
+ BitField<54, 1, u64> x;
+ BitField<55, 1, u64> psl;
+ BitField<56, 1, u64> mrg;
+ } const xmad{insn};
+
+ XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
+ xmad.mrg != 0, xmad.x != 0);
+}
+
+void TranslatorVisitor::XMAD_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<20, 16, u64> src_b;
+ BitField<36, 1, u64> psl;
+ BitField<37, 1, u64> mrg;
+ BitField<38, 1, u64> x;
+ BitField<50, 3, SelectMode> select_mode;
+ } const xmad{insn};
+
+ XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
+ Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class IntegerWidth : u64 {
+ Byte,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
+ switch (width) {
+ case IntegerWidth::Byte:
+ return ir.Imm32(8);
+ case IntegerWidth::Short:
+ return ir.Imm32(16);
+ case IntegerWidth::Word:
+ return ir.Imm32(32);
+ default:
+ throw NotImplementedException("Invalid width {}", width);
+ }
+}
+
+[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
+ IntegerWidth dst_width) {
+ const IR::U32 zero{ir.Imm32(0)};
+ const IR::U32 count{WidthSize(ir, dst_width)};
+ return ir.BitFieldExtract(src, zero, count, false);
+}
+
+[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
+ bool dst_signed, bool src_signed) {
+ IR::U32 min{};
+ IR::U32 max{};
+ const IR::U32 zero{ir.Imm32(0)};
+ switch (dst_width) {
+ case IntegerWidth::Byte:
+ min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
+ max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
+ break;
+ case IntegerWidth::Short:
+ min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
+ break;
+ case IntegerWidth::Word:
+ min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
+ max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
+ break;
+ default:
+ throw NotImplementedException("Invalid width {}", dst_width);
+ }
+ const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
+ return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
+}
+
+void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 2, IntegerWidth> dst_fmt;
+ BitField<12, 1, u64> dst_fmt_sign;
+ BitField<10, 2, IntegerWidth> src_fmt;
+ BitField<13, 1, u64> src_fmt_sign;
+ BitField<41, 3, u64> selector;
+ BitField<45, 1, u64> neg;
+ BitField<47, 1, u64> cc;
+ BitField<49, 1, u64> abs;
+ BitField<50, 1, u64> sat;
+ } const i2i{insn};
+
+ if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
+ throw NotImplementedException("16-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+ if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
+ throw NotImplementedException("32-bit source format incompatible with selector {}",
+ i2i.selector);
+ }
+
+ const s32 selector{static_cast<s32>(i2i.selector)};
+ const IR::U32 offset{v.ir.Imm32(selector * 8)};
+ const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
+ const bool src_signed{i2i.src_fmt_sign != 0};
+ const bool dst_signed{i2i.dst_fmt_sign != 0};
+ const bool sat{i2i.sat != 0};
+
+ IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
+ if (i2i.abs != 0) {
+ src_values = v.ir.IAbs(src_values);
+ }
+ if (i2i.neg != 0) {
+ src_values = v.ir.INeg(src_values);
+ }
+ const IR::U32 result{
+ sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
+ : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
+
+ v.X(i2i.dest_reg, result);
+ if (i2i.cc != 0) {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::I2I_reg(u64 insn) {
+ I2I(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::I2I_cbuf(u64 insn) {
+ I2I(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::I2I_imm(u64 insn) {
+ I2I(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ Patch,
+ Prim,
+ Attr,
+};
+
+enum class Shift : u64 {
+ Default,
+ U16,
+ B32,
+};
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ISBERD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<31, 1, u64> skew;
+ BitField<32, 1, u64> o;
+ BitField<33, 2, Mode> mode;
+ BitField<47, 2, Shift> shift;
+ } const isberd{insn};
+
+ if (isberd.skew != 0) {
+ throw NotImplementedException("SKEW");
+ }
+ if (isberd.o != 0) {
+ throw NotImplementedException("O");
+ }
+ if (isberd.mode != Mode::Default) {
+ throw NotImplementedException("Mode {}", isberd.mode.Value());
+ }
+ if (isberd.shift != Shift::Default) {
+ throw NotImplementedException("Shift {}", isberd.shift.Value());
+ }
+ LOG_WARNING(Shader, "(STUBBED) called");
+ X(isberd.dest_reg, X(isberd.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
+
+namespace Shader::Maxwell {
+using namespace LDC;
+namespace {
+std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
+ const IR::U32& reg, const IR::U32& imm) {
+ switch (mode) {
+ case Mode::Default:
+ return {imm_index, ir.IAdd(reg, imm)};
+ default:
+ break;
+ }
+ throw NotImplementedException("Mode {}", mode);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDC(u64 insn) {
+ const Encoding ldc{insn};
+ const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
+ const IR::U32 reg{X(ldc.src_reg)};
+ const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
+ const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
+ switch (ldc.size) {
+ case Size::U8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
+ break;
+ case Size::S8:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
+ break;
+ case Size::U16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
+ break;
+ case Size::S16:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
+ break;
+ case Size::B32:
+ X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
+ break;
+ case Size::B64: {
+ if (!IR::IsAligned(ldc.dest_reg, 2)) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
+ for (int i = 0; i < 2; ++i) {
+ X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid size {}", ldc.size.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+
+namespace Shader::Maxwell::LDC {
+
+enum class Mode : u64 {
+ Default,
+ IL,
+ IS,
+ ISL,
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<20, 16, s64> offset;
+ BitField<36, 5, u64> index;
+ BitField<44, 2, Mode> mode;
+ BitField<48, 3, Size> size;
+};
+
+} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
+ bool neg, bool x) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+
+ if (x) {
+ throw NotImplementedException("LEA.HI X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.HI Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.HI CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
+ const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
+
+ const s32 hi_scale{32 - static_cast<s32>(scale)};
+ const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
+ const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
+ v.X(lea.dest_reg, result);
+}
+
+void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> offset_lo_reg;
+ BitField<39, 5, u64> scale;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> x;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> pred;
+ } const lea{insn};
+ if (lea.x != 0) {
+ throw NotImplementedException("LEA.LO X");
+ }
+ if (lea.pred != IR::Pred::PT) {
+ throw NotImplementedException("LEA.LO Pred");
+ }
+ if (lea.cc != 0) {
+ throw NotImplementedException("LEA.LO CC");
+ }
+
+ const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
+ const s32 scale{static_cast<s32>(lea.scale)};
+ const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
+ const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
+
+ IR::U32 result{v.ir.IAdd(base, scaled_offset)};
+ v.X(lea.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LEA_hi_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 5, u64> scale;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
+ union {
+ u64 insn;
+ BitField<51, 5, u64> scale;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> x;
+ } const lea{insn};
+
+ LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
+}
+
+void TranslatorVisitor::LEA_lo_reg(u64 insn) {
+ LEA_lo(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
+ LEA_lo(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LEA_lo_imm(u64 insn) {
+ LEA_lo(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ B32,
+ B64,
+ B96,
+ B128,
+};
+
+enum class InterpolationMode : u64 {
+ Pass,
+ Multiply,
+ Constant,
+ Sc,
+};
+
+enum class SampleMode : u64 {
+ Default,
+ Centroid,
+ Offset,
+};
+
+u32 NumElements(Size size) {
+ switch (size) {
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B96:
+ return 3;
+ case Size::B128:
+ return 4;
+ }
+ throw InvalidArgument("Invalid size {}", size);
+}
+
+template <typename F>
+void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
+ const IR::U32 index_value{v.X(index_reg)};
+ for (u32 element = 0; element < num_elements; ++element) {
+ const IR::U32 final_offset{
+ element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
+ f(element, final_offset);
+ }
+}
+
+} // Anonymous namespace
+
+void TranslatorVisitor::ALD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<32, 1, u64> o;
+ BitField<31, 1, u64> patch;
+ BitField<47, 2, Size> size;
+ } const ald{insn};
+
+ const u64 offset{ald.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ald.vertex_reg)};
+ const u32 num_elements{NumElements(ald.size)};
+ if (ald.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ald.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
+ }
+ }
+ return;
+ }
+ if (ald.patch != 0) {
+ throw NotImplementedException("Indirect patch read");
+ }
+ HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
+ });
+}
+
+void TranslatorVisitor::AST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> src_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 10, u64> absolute_offset;
+ BitField<20, 11, s64> relative_offset;
+ BitField<31, 1, u64> patch;
+ BitField<39, 8, IR::Reg> vertex_reg;
+ BitField<47, 2, Size> size;
+ } const ast{insn};
+
+ if (ast.index_reg != IR::Reg::RZ) {
+ throw NotImplementedException("Indexed store");
+ }
+ const u64 offset{ast.absolute_offset.Value()};
+ if (offset % 4 != 0) {
+ throw NotImplementedException("Unaligned absolute offset {}", offset);
+ }
+ const IR::U32 vertex{X(ast.vertex_reg)};
+ const u32 num_elements{NumElements(ast.size)};
+ if (ast.index_reg == IR::Reg::RZ) {
+ for (u32 element = 0; element < num_elements; ++element) {
+ if (ast.patch != 0) {
+ const IR::Patch patch{offset / 4 + element};
+ ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
+ } else {
+ const IR::Attribute attr{offset / 4 + element};
+ ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
+ }
+ }
+ return;
+ }
+ if (ast.patch != 0) {
+ throw NotImplementedException("Indexed tessellation patch store");
+ }
+ HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
+ ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
+ });
+}
+
+void TranslatorVisitor::IPA(u64 insn) {
+ // IPA is the instruction used to read varyings from a fragment shader.
+ // gl_FragCoord is mapped to the gl_Position attribute.
+ // It yields unknown results when used outside of the fragment shader stage.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> index_reg;
+ BitField<20, 8, IR::Reg> multiplier;
+ BitField<30, 8, IR::Attribute> attribute;
+ BitField<38, 1, u64> idx;
+ BitField<51, 1, u64> sat;
+ BitField<52, 2, SampleMode> sample_mode;
+ BitField<54, 2, InterpolationMode> interpolation_mode;
+ } const ipa{insn};
+
+ // Indexed IPAs are used for indexed varyings.
+ // For example:
+ //
+ // in vec4 colors[4];
+ // uniform int idx;
+ // void main() {
+ // gl_FragColor = colors[idx];
+ // }
+ const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
+ const IR::Attribute attribute{ipa.attribute};
+ IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
+ : ir.GetAttribute(attribute)};
+ if (IR::IsGeneric(attribute)) {
+ const ProgramHeader& sph{env.SPH()};
+ const u32 attr_index{IR::GenericAttributeIndex(attribute)};
+ const u32 element{static_cast<u32>(attribute) % 4};
+ const std::array input_map{sph.ps.GenericInputMap(attr_index)};
+ const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
+ if (is_perspective) {
+ const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
+ value = ir.FPMul(value, position_w);
+ }
+ }
+ if (ipa.interpolation_mode == InterpolationMode::Multiply) {
+ value = ir.FPMul(value, F(ipa.multiplier));
+ }
+
+ // Saturated IPAs are generally generated out of clamped varyings.
+ // For example: clamp(some_varying, 0.0, 1.0)
+ const bool is_saturated{ipa.sat != 0};
+ if (is_saturated) {
+ if (attribute == IR::Attribute::FrontFace) {
+ throw NotImplementedException("IPA.SAT on FrontFace");
+ }
+ value = ir.FPSaturate(value);
+ }
+
+ F(ipa.dest_reg, value);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<20, 24, u64> absolute_offset;
+ BitField<20, 24, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
+ const IR::U32 offset{Offset(v, insn)};
+ if (offset.IsImmediate()) {
+ return {v.ir.Imm32(offset.U32() / 4), offset};
+ } else {
+ return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
+ }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 3, Size> size;
+ } const encoding{insn};
+
+ switch (encoding.size) {
+ case Size::U8:
+ return {8, false};
+ case Size::S8:
+ return {8, true};
+ case Size::U16:
+ return {16, false};
+ case Size::S16:
+ return {16, true};
+ case Size::B32:
+ return {32, false};
+ case Size::B64:
+ return {64, false};
+ case Size::B128:
+ return {128, false};
+ default:
+ throw NotImplementedException("Invalid size {}", encoding.size.Value());
+ }
+}
+
+IR::Reg Reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> reg;
+ } const encoding{insn};
+
+ return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+
+IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
+ const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
+ const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
+ return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ const IR::U32 word{LoadLocal(*this, word_offset, offset)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ X(dest, word);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
+ const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
+ X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ X(dest, IR::U32{value});
+ break;
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ for (int element = 0; element < bit_size / 32; ++element) {
+ X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+ const auto [word_offset, offset]{WordOffset(*this, insn)};
+ if (offset.IsImmediate()) {
+ // TODO: Support storing out of bounds at runtime
+ if (offset.U32() >= env.LocalMemorySize()) {
+ LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
+ offset.U32(), env.LocalMemorySize());
+ return;
+ }
+ }
+ const IR::Reg reg{Reg(insn)};
+ const IR::U32 src{X(reg)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
+ throw NotImplementedException("Unaligned source register");
+ }
+ ir.WriteLocal(word_offset, src);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg reg{Reg(insn)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ ir.WriteShared(bit_size, offset, X(reg));
+ break;
+ case 64:
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+ break;
+ case 128: {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+ ir.WriteShared(128, offset, vector);
+ break;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LoadSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+ U128, // ???
+};
+
+enum class StoreSize : u64 {
+ U8, // Zero-extend
+ S8, // Sign-extend
+ U16, // Zero-extend
+ S16, // Sign-extend
+ B32,
+ B64,
+ B128,
+};
+
+// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (cache in L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
+};
+
+// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory)
+};
+
+IR::U64 Address(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 24, s64> addr_offset;
+ BitField<20, 24, u64> rz_addr_offset;
+ BitField<45, 1, u64> e;
+ } const mem{insn};
+
+ const IR::U64 address{[&]() -> IR::U64 {
+ if (mem.e == 0) {
+ // LDG/STG without .E uses a 32-bit pointer, zero-extend it
+ return v.ir.UConvert(64, v.X(mem.addr_reg));
+ }
+ if (!IR::IsAligned(mem.addr_reg, 2)) {
+ throw NotImplementedException("Unaligned address register");
+ }
+ // Pack two registers to build the 64-bit address
+ return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
+ }()};
+ const u64 addr_offset{[&]() -> u64 {
+ if (mem.addr_reg == IR::Reg::RZ) {
+ // When RZ is used, the address is an absolute address
+ return static_cast<u64>(mem.rz_addr_offset.Value());
+ } else {
+ return static_cast<u64>(mem.addr_offset.Value());
+ }
+ }()};
+ // Apply the offset
+ return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDG(u64 insn) {
+ // LDG loads global memory into registers
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<46, 2, LoadCache> cache;
+ BitField<48, 3, LoadSize> size;
+ } const ldg{insn};
+
+ // Pointer to load data from
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg dest_reg{ldg.dest_reg};
+ switch (ldg.size) {
+ case LoadSize::U8:
+ X(dest_reg, ir.LoadGlobalU8(address));
+ break;
+ case LoadSize::S8:
+ X(dest_reg, ir.LoadGlobalS8(address));
+ break;
+ case LoadSize::U16:
+ X(dest_reg, ir.LoadGlobalU16(address));
+ break;
+ case LoadSize::S16:
+ X(dest_reg, ir.LoadGlobalS16(address));
+ break;
+ case LoadSize::B32:
+ X(dest_reg, ir.LoadGlobal32(address));
+ break;
+ case LoadSize::B64: {
+ if (!IR::IsAligned(dest_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal64(address)};
+ for (int i = 0; i < 2; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ case LoadSize::B128:
+ case LoadSize::U128: {
+ if (!IR::IsAligned(dest_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.LoadGlobal128(address)};
+ for (int i = 0; i < 4; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
+ }
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
+ }
+}
+
+void TranslatorVisitor::STG(u64 insn) {
+ // STG stores registers into global memory.
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<46, 2, StoreCache> cache;
+ BitField<48, 3, StoreSize> size;
+ } const stg{insn};
+
+ // Pointer to store data into
+ const IR::U64 address{Address(*this, insn)};
+ const IR::Reg data_reg{stg.data_reg};
+ switch (stg.size) {
+ case StoreSize::U8:
+ ir.WriteGlobalU8(address, X(data_reg));
+ break;
+ case StoreSize::S8:
+ ir.WriteGlobalS8(address, X(data_reg));
+ break;
+ case StoreSize::U16:
+ ir.WriteGlobalU16(address, X(data_reg));
+ break;
+ case StoreSize::S16:
+ ir.WriteGlobalS16(address, X(data_reg));
+ break;
+ case StoreSize::B32:
+ ir.WriteGlobal32(address, X(data_reg));
+ break;
+ case StoreSize::B64: {
+ if (!IR::IsAligned(data_reg, 2)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
+ ir.WriteGlobal64(address, vector);
+ break;
+ }
+ case StoreSize::B128:
+ if (!IR::IsAligned(data_reg, 4)) {
+ throw NotImplementedException("Unaligned data registers");
+ }
+ const IR::Value vector{
+ ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
+ ir.WriteGlobal128(address, vector);
+ break;
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class LogicalOp : u64 {
+ AND,
+ OR,
+ XOR,
+ PASS_B,
+};
+
+[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
+ const IR::U32& operand_2, LogicalOp op) {
+ switch (op) {
+ case LogicalOp::AND:
+ return ir.BitwiseAnd(operand_1, operand_2);
+ case LogicalOp::OR:
+ return ir.BitwiseOr(operand_1, operand_2);
+ case LogicalOp::XOR:
+ return ir.BitwiseXor(operand_1, operand_2);
+ case LogicalOp::PASS_B:
+ return operand_2;
+ default:
+ throw NotImplementedException("Invalid Logical operation {}", op);
+ }
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
+ LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
+ IR::Pred dest_pred = IR::Pred::PT) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ } const lop{insn};
+
+ if (x) {
+ throw NotImplementedException("X");
+ }
+ IR::U32 op_a{v.X(lop.src_reg)};
+ if (inv_a != 0) {
+ op_a = v.ir.BitwiseNot(op_a);
+ }
+ if (inv_b != 0) {
+ op_b = v.ir.BitwiseNot(op_b);
+ }
+
+ const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
+ if (pred_op) {
+ const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
+ v.ir.SetPred(dest_pred, pred_result);
+ }
+ if (cc) {
+ if (bit_op == LogicalOp::PASS_B) {
+ v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
+ v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
+ } else {
+ v.SetZFlag(v.ir.GetZeroFromOp(result));
+ v.SetSFlag(v.ir.GetSignFromOp(result));
+ }
+ v.ResetCFlag();
+ v.ResetOFlag();
+ }
+ v.X(lop.dest_reg, result);
+}
+
+void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
+ union {
+ u64 insn;
+ BitField<39, 1, u64> inv_a;
+ BitField<40, 1, u64> inv_b;
+ BitField<41, 2, LogicalOp> bit_op;
+ BitField<43, 1, u64> x;
+ BitField<44, 2, PredicateOp> pred_op;
+ BitField<47, 1, u64> cc;
+ BitField<48, 3, IR::Pred> dest_pred;
+ } const lop{insn};
+
+ LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
+ lop.pred_op, lop.dest_pred);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP_reg(u64 insn) {
+ LOP(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::LOP_cbuf(u64 insn) {
+ LOP(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::LOP_imm(u64 insn) {
+ LOP(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::LOP32I(u64 insn) {
+ union {
+ u64 raw;
+ BitField<53, 2, LogicalOp> bit_op;
+ BitField<57, 1, u64> x;
+ BitField<52, 1, u64> cc;
+ BitField<55, 1, u64> inv_a;
+ BitField<56, 1, u64> inv_b;
+ } const lop32i{insn};
+
+ LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
+ lop32i.inv_b != 0, lop32i.bit_op);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
+// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
+IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
+ u64 ttbl) {
+ IR::U32 r{ir.Imm32(0)};
+ const IR::U32 not_a{ir.BitwiseNot(a)};
+ const IR::U32 not_b{ir.BitwiseNot(b)};
+ const IR::U32 not_c{ir.BitwiseNot(c)};
+ if (ttbl & 0x01) {
+ // r |= ~a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x02) {
+ // r |= ~a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x04) {
+ // r |= ~a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x08) {
+ // r |= ~a & b & c;
+ const auto lhs{ir.BitwiseAnd(not_a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x10) {
+ // r |= a & ~b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x20) {
+ // r |= a & ~b & c;
+ const auto lhs{ir.BitwiseAnd(a, not_b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x40) {
+ // r |= a & b & ~c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, not_c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ if (ttbl & 0x80) {
+ // r |= a & b & c;
+ const auto lhs{ir.BitwiseAnd(a, b)};
+ const auto rhs{ir.BitwiseAnd(lhs, c)};
+ r = ir.BitwiseOr(r, rhs);
+ }
+ return r;
+}
+
+IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<47, 1, u64> cc;
+ } const lop3{insn};
+
+ if (lop3.cc != 0) {
+ throw NotImplementedException("LOP3 CC");
+ }
+
+ const IR::U32 op_a{v.X(lop3.src_reg)};
+ const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
+ v.X(lop3.dest_reg, result);
+ return result;
+}
+
+u64 GetLut48(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 8, u64> lut;
+ } const lut{insn};
+ return lut.lut;
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LOP3_reg(u64 insn) {
+ union {
+ u64 insn;
+ BitField<28, 8, u64> lut;
+ BitField<38, 1, u64> x;
+ BitField<36, 2, PredicateOp> pred_op;
+ BitField<48, 3, IR::Pred> pred;
+ } const lop3{insn};
+
+ if (lop3.x != 0) {
+ throw NotImplementedException("LOP3 X");
+ }
+ const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
+ const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
+ ir.SetPred(lop3.pred, pred_result);
+}
+
+void TranslatorVisitor::LOP3_cbuf(u64 insn) {
+ LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
+}
+
+void TranslatorVisitor::LOP3_imm(u64 insn) {
+ LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::P2R_reg(u64) {
+ throw NotImplementedException("P2R (reg)");
+}
+
+void TranslatorVisitor::P2R_cbuf(u64) {
+ throw NotImplementedException("P2R (cbuf)");
+}
+
+void TranslatorVisitor::P2R_imm(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const p2r{insn};
+
+ const u32 mask{GetImm20(insn).U32()};
+ const bool pr_mode{p2r.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
+ IR::U32 insert{ir.Imm32(0)};
+ for (u32 index = 0; index < num_items; ++index) {
+ if (((mask >> index) & 1) == 0) {
+ continue;
+ }
+ const IR::U1 cond{[this, index, pr_mode] {
+ if (pr_mode) {
+ return ir.GetPred(IR::Pred{index});
+ }
+ switch (index) {
+ case 0:
+ return ir.GetZFlag();
+ case 1:
+ return ir.GetSFlag();
+ case 2:
+ return ir.GetCFlag();
+ case 3:
+ return ir.GetOFlag();
+ }
+ throw LogicError("Unreachable P2R index");
+ }()};
+ const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
+ insert = ir.BitwiseOr(insert, bit);
+ }
+ const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
+ X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 4, u64> mask;
+ BitField<12, 4, u64> mov32i_mask;
+ } const mov{insn};
+
+ if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
+ throw NotImplementedException("Non-full move mask");
+ }
+ v.X(mov.dest_reg, src);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::MOV_reg(u64 insn) {
+ MOV(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::MOV_cbuf(u64 insn) {
+ MOV(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::MOV_imm(u64 insn) {
+ MOV(*this, insn, GetImm20(insn));
+}
+
+void TranslatorVisitor::MOV32I(u64 insn) {
+ MOV(*this, insn, GetImm32(insn), true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ PR,
+ CC,
+};
+
+void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
+ switch (index) {
+ case 0:
+ return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
+ case 1:
+ return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
+ case 2:
+ return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
+ case 3:
+ return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
+ default:
+ throw LogicError("Unreachable R2P index");
+ }
+}
+
+void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<40, 1, Mode> mode;
+ BitField<41, 2, u64> byte_selector;
+ } const r2p{insn};
+ const IR::U32 src{v.X(r2p.src_reg)};
+ const IR::U32 count{v.ir.Imm32(1)};
+ const bool pr_mode{r2p.mode == Mode::PR};
+ const u32 num_items{pr_mode ? 7U : 4U};
+ const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
+ for (u32 index = 0; index < num_items; ++index) {
+ const IR::U32 offset{v.ir.Imm32(offset_base + index)};
+ const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
+ const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
+ const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
+ const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
+ if (pr_mode) {
+ const IR::Pred pred{index};
+ v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
+ } else {
+ SetFlag(v.ir, inv_mask_bit, src_bit, index);
+ }
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::R2P_reg(u64 insn) {
+ R2P(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::R2P_cbuf(u64 insn) {
+ R2P(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::R2P_imm(u64 insn) {
+ R2P(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class SpecialRegister : u64 {
+ SR_LANEID = 0,
+ SR_CLOCK = 1,
+ SR_VIRTCFG = 2,
+ SR_VIRTID = 3,
+ SR_PM0 = 4,
+ SR_PM1 = 5,
+ SR_PM2 = 6,
+ SR_PM3 = 7,
+ SR_PM4 = 8,
+ SR_PM5 = 9,
+ SR_PM6 = 10,
+ SR_PM7 = 11,
+ SR12 = 12,
+ SR13 = 13,
+ SR14 = 14,
+ SR_ORDERING_TICKET = 15,
+ SR_PRIM_TYPE = 16,
+ SR_INVOCATION_ID = 17,
+ SR_Y_DIRECTION = 18,
+ SR_THREAD_KILL = 19,
+ SM_SHADER_TYPE = 20,
+ SR_DIRECTCBEWRITEADDRESSLOW = 21,
+ SR_DIRECTCBEWRITEADDRESSHIGH = 22,
+ SR_DIRECTCBEWRITEENABLE = 23,
+ SR_MACHINE_ID_0 = 24,
+ SR_MACHINE_ID_1 = 25,
+ SR_MACHINE_ID_2 = 26,
+ SR_MACHINE_ID_3 = 27,
+ SR_AFFINITY = 28,
+ SR_INVOCATION_INFO = 29,
+ SR_WSCALEFACTOR_XY = 30,
+ SR_WSCALEFACTOR_Z = 31,
+ SR_TID = 32,
+ SR_TID_X = 33,
+ SR_TID_Y = 34,
+ SR_TID_Z = 35,
+ SR_CTA_PARAM = 36,
+ SR_CTAID_X = 37,
+ SR_CTAID_Y = 38,
+ SR_CTAID_Z = 39,
+ SR_NTID = 40,
+ SR_CirQueueIncrMinusOne = 41,
+ SR_NLATC = 42,
+ SR43 = 43,
+ SR_SM_SPA_VERSION = 44,
+ SR_MULTIPASSSHADERINFO = 45,
+ SR_LWINHI = 46,
+ SR_SWINHI = 47,
+ SR_SWINLO = 48,
+ SR_SWINSZ = 49,
+ SR_SMEMSZ = 50,
+ SR_SMEMBANKS = 51,
+ SR_LWINLO = 52,
+ SR_LWINSZ = 53,
+ SR_LMEMLOSZ = 54,
+ SR_LMEMHIOFF = 55,
+ SR_EQMASK = 56,
+ SR_LTMASK = 57,
+ SR_LEMASK = 58,
+ SR_GTMASK = 59,
+ SR_GEMASK = 60,
+ SR_REGALLOC = 61,
+ SR_BARRIERALLOC = 62,
+ SR63 = 63,
+ SR_GLOBALERRORSTATUS = 64,
+ SR65 = 65,
+ SR_WARPERRORSTATUS = 66,
+ SR_WARPERRORSTATUSCLEAR = 67,
+ SR68 = 68,
+ SR69 = 69,
+ SR70 = 70,
+ SR71 = 71,
+ SR_PM_HI0 = 72,
+ SR_PM_HI1 = 73,
+ SR_PM_HI2 = 74,
+ SR_PM_HI3 = 75,
+ SR_PM_HI4 = 76,
+ SR_PM_HI5 = 77,
+ SR_PM_HI6 = 78,
+ SR_PM_HI7 = 79,
+ SR_CLOCKLO = 80,
+ SR_CLOCKHI = 81,
+ SR_GLOBALTIMERLO = 82,
+ SR_GLOBALTIMERHI = 83,
+ SR84 = 84,
+ SR85 = 85,
+ SR86 = 86,
+ SR87 = 87,
+ SR88 = 88,
+ SR89 = 89,
+ SR90 = 90,
+ SR91 = 91,
+ SR92 = 92,
+ SR93 = 93,
+ SR94 = 94,
+ SR95 = 95,
+ SR_HWTASKID = 96,
+ SR_CIRCULARQUEUEENTRYINDEX = 97,
+ SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
+ SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
+};
+
+[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
+ switch (special_register) {
+ case SpecialRegister::SR_INVOCATION_ID:
+ return ir.InvocationId();
+ case SpecialRegister::SR_THREAD_KILL:
+ return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
+ case SpecialRegister::SR_INVOCATION_INFO:
+ LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
+ return ir.Imm32(0x00ff'0000);
+ case SpecialRegister::SR_TID: {
+ const IR::Value tid{ir.LocalInvocationId()};
+ return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
+ IR::U32{ir.CompositeExtract(tid, 1)},
+ ir.Imm32(16), ir.Imm32(8)),
+ IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
+ }
+ case SpecialRegister::SR_TID_X:
+ return ir.LocalInvocationIdX();
+ case SpecialRegister::SR_TID_Y:
+ return ir.LocalInvocationIdY();
+ case SpecialRegister::SR_TID_Z:
+ return ir.LocalInvocationIdZ();
+ case SpecialRegister::SR_CTAID_X:
+ return ir.WorkgroupIdX();
+ case SpecialRegister::SR_CTAID_Y:
+ return ir.WorkgroupIdY();
+ case SpecialRegister::SR_CTAID_Z:
+ return ir.WorkgroupIdZ();
+ case SpecialRegister::SR_WSCALEFACTOR_XY:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_WSCALEFACTOR_Z:
+ LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
+ return ir.Imm32(Common::BitCast<u32>(1.0f));
+ case SpecialRegister::SR_LANEID:
+ return ir.LaneId();
+ case SpecialRegister::SR_EQMASK:
+ return ir.SubgroupEqMask();
+ case SpecialRegister::SR_LTMASK:
+ return ir.SubgroupLtMask();
+ case SpecialRegister::SR_LEMASK:
+ return ir.SubgroupLeMask();
+ case SpecialRegister::SR_GTMASK:
+ return ir.SubgroupGtMask();
+ case SpecialRegister::SR_GEMASK:
+ return ir.SubgroupGeMask();
+ case SpecialRegister::SR_Y_DIRECTION:
+ return ir.BitCast<IR::U32>(ir.YDirection());
+ case SpecialRegister::SR_AFFINITY:
+ LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
+ return ir.Imm32(0); // This is the default value hardware returns.
+ default:
+ throw NotImplementedException("S2R special register {}", special_register);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::S2R(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 8, SpecialRegister> src_reg;
+ } const s2r{insn};
+
+ X(s2r.dest_reg, Read(ir, s2r.src_reg));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/opcodes.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+
+[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
+ throw NotImplementedException("Instruction {} is not implemented", opcode);
+}
+
+void TranslatorVisitor::ATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOM_cas);
+}
+
+void TranslatorVisitor::ATOMS_cas(u64) {
+ ThrowNotImplemented(Opcode::ATOMS_cas);
+}
+
+void TranslatorVisitor::B2R(u64) {
+ ThrowNotImplemented(Opcode::B2R);
+}
+
+void TranslatorVisitor::BPT(u64) {
+ ThrowNotImplemented(Opcode::BPT);
+}
+
+void TranslatorVisitor::BRA(u64) {
+ ThrowNotImplemented(Opcode::BRA);
+}
+
+void TranslatorVisitor::BRK(u64) {
+ ThrowNotImplemented(Opcode::BRK);
+}
+
+void TranslatorVisitor::CAL() {
+ // CAL is a no-op
+}
+
+void TranslatorVisitor::CCTL(u64) {
+ ThrowNotImplemented(Opcode::CCTL);
+}
+
+void TranslatorVisitor::CCTLL(u64) {
+ ThrowNotImplemented(Opcode::CCTLL);
+}
+
+void TranslatorVisitor::CONT(u64) {
+ ThrowNotImplemented(Opcode::CONT);
+}
+
+void TranslatorVisitor::CS2R(u64) {
+ ThrowNotImplemented(Opcode::CS2R);
+}
+
+void TranslatorVisitor::FCHK_reg(u64) {
+ ThrowNotImplemented(Opcode::FCHK_reg);
+}
+
+void TranslatorVisitor::FCHK_cbuf(u64) {
+ ThrowNotImplemented(Opcode::FCHK_cbuf);
+}
+
+void TranslatorVisitor::FCHK_imm(u64) {
+ ThrowNotImplemented(Opcode::FCHK_imm);
+}
+
+void TranslatorVisitor::GETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::GETCRSPTR);
+}
+
+void TranslatorVisitor::GETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::GETLMEMBASE);
+}
+
+void TranslatorVisitor::IDE(u64) {
+ ThrowNotImplemented(Opcode::IDE);
+}
+
+void TranslatorVisitor::IDP_reg(u64) {
+ ThrowNotImplemented(Opcode::IDP_reg);
+}
+
+void TranslatorVisitor::IDP_imm(u64) {
+ ThrowNotImplemented(Opcode::IDP_imm);
+}
+
+void TranslatorVisitor::IMAD_reg(u64) {
+ ThrowNotImplemented(Opcode::IMAD_reg);
+}
+
+void TranslatorVisitor::IMAD_rc(u64) {
+ ThrowNotImplemented(Opcode::IMAD_rc);
+}
+
+void TranslatorVisitor::IMAD_cr(u64) {
+ ThrowNotImplemented(Opcode::IMAD_cr);
+}
+
+void TranslatorVisitor::IMAD_imm(u64) {
+ ThrowNotImplemented(Opcode::IMAD_imm);
+}
+
+void TranslatorVisitor::IMAD32I(u64) {
+ ThrowNotImplemented(Opcode::IMAD32I);
+}
+
+void TranslatorVisitor::IMADSP_reg(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_reg);
+}
+
+void TranslatorVisitor::IMADSP_rc(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_rc);
+}
+
+void TranslatorVisitor::IMADSP_cr(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_cr);
+}
+
+void TranslatorVisitor::IMADSP_imm(u64) {
+ ThrowNotImplemented(Opcode::IMADSP_imm);
+}
+
+void TranslatorVisitor::IMUL_reg(u64) {
+ ThrowNotImplemented(Opcode::IMUL_reg);
+}
+
+void TranslatorVisitor::IMUL_cbuf(u64) {
+ ThrowNotImplemented(Opcode::IMUL_cbuf);
+}
+
+void TranslatorVisitor::IMUL_imm(u64) {
+ ThrowNotImplemented(Opcode::IMUL_imm);
+}
+
+void TranslatorVisitor::IMUL32I(u64) {
+ ThrowNotImplemented(Opcode::IMUL32I);
+}
+
+void TranslatorVisitor::JCAL(u64) {
+ ThrowNotImplemented(Opcode::JCAL);
+}
+
+void TranslatorVisitor::JMP(u64) {
+ ThrowNotImplemented(Opcode::JMP);
+}
+
+void TranslatorVisitor::KIL() {
+ // KIL is a no-op
+}
+
+void TranslatorVisitor::LD(u64) {
+ ThrowNotImplemented(Opcode::LD);
+}
+
+void TranslatorVisitor::LEPC(u64) {
+ ThrowNotImplemented(Opcode::LEPC);
+}
+
+void TranslatorVisitor::LONGJMP(u64) {
+ ThrowNotImplemented(Opcode::LONGJMP);
+}
+
+void TranslatorVisitor::NOP(u64) {
+ // NOP is No-Op.
+}
+
+void TranslatorVisitor::PBK() {
+ // PBK is a no-op
+}
+
+void TranslatorVisitor::PCNT() {
+ // PCNT is a no-op
+}
+
+void TranslatorVisitor::PEXIT(u64) {
+ ThrowNotImplemented(Opcode::PEXIT);
+}
+
+void TranslatorVisitor::PLONGJMP(u64) {
+ ThrowNotImplemented(Opcode::PLONGJMP);
+}
+
+void TranslatorVisitor::PRET(u64) {
+ ThrowNotImplemented(Opcode::PRET);
+}
+
+void TranslatorVisitor::PRMT_reg(u64) {
+ ThrowNotImplemented(Opcode::PRMT_reg);
+}
+
+void TranslatorVisitor::PRMT_rc(u64) {
+ ThrowNotImplemented(Opcode::PRMT_rc);
+}
+
+void TranslatorVisitor::PRMT_cr(u64) {
+ ThrowNotImplemented(Opcode::PRMT_cr);
+}
+
+void TranslatorVisitor::PRMT_imm(u64) {
+ ThrowNotImplemented(Opcode::PRMT_imm);
+}
+
+void TranslatorVisitor::R2B(u64) {
+ ThrowNotImplemented(Opcode::R2B);
+}
+
+void TranslatorVisitor::RAM(u64) {
+ ThrowNotImplemented(Opcode::RAM);
+}
+
+void TranslatorVisitor::RET(u64) {
+ ThrowNotImplemented(Opcode::RET);
+}
+
+void TranslatorVisitor::RTT(u64) {
+ ThrowNotImplemented(Opcode::RTT);
+}
+
+void TranslatorVisitor::SAM(u64) {
+ ThrowNotImplemented(Opcode::SAM);
+}
+
+void TranslatorVisitor::SETCRSPTR(u64) {
+ ThrowNotImplemented(Opcode::SETCRSPTR);
+}
+
+void TranslatorVisitor::SETLMEMBASE(u64) {
+ ThrowNotImplemented(Opcode::SETLMEMBASE);
+}
+
+void TranslatorVisitor::SSY() {
+ // SSY is a no-op
+}
+
+void TranslatorVisitor::ST(u64) {
+ ThrowNotImplemented(Opcode::ST);
+}
+
+void TranslatorVisitor::STP(u64) {
+ ThrowNotImplemented(Opcode::STP);
+}
+
+void TranslatorVisitor::SUATOM_cas(u64) {
+ ThrowNotImplemented(Opcode::SUATOM_cas);
+}
+
+void TranslatorVisitor::SYNC(u64) {
+ ThrowNotImplemented(Opcode::SYNC);
+}
+
+void TranslatorVisitor::TXA(u64) {
+ ThrowNotImplemented(Opcode::TXA);
+}
+
+void TranslatorVisitor::VABSDIFF(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF);
+}
+
+void TranslatorVisitor::VABSDIFF4(u64) {
+ ThrowNotImplemented(Opcode::VABSDIFF4);
+}
+
+void TranslatorVisitor::VADD(u64) {
+ ThrowNotImplemented(Opcode::VADD);
+}
+
+void TranslatorVisitor::VSET(u64) {
+ ThrowNotImplemented(Opcode::VSET);
+}
+void TranslatorVisitor::VSHL(u64) {
+ ThrowNotImplemented(Opcode::VSHL);
+}
+
+void TranslatorVisitor::VSHR(u64) {
+ ThrowNotImplemented(Opcode::VSHR);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> output_reg; // Not needed on host
+ BitField<39, 1, u64> emit;
+ BitField<40, 1, u64> cut;
+ } const out{insn};
+
+ stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
+
+ if (out.emit != 0) {
+ v.ir.EmitVertex(stream_index);
+ }
+ if (out.cut != 0) {
+ v.ir.EndPrimitive(stream_index);
+ }
+ // Host doesn't need the output register, but we can write to it to avoid undefined reads
+ v.X(out.dest_reg, v.ir.Imm32(0));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::OUT_reg(u64 insn) {
+ OUT(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::OUT_cbuf(u64 insn) {
+ OUT(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::OUT_imm(u64 insn) {
+ OUT(*this, insn, GetImm20(insn));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Default,
+ CovMask,
+ Covered,
+ Offset,
+ CentroidOffset,
+ MyIndex,
+};
+} // Anonymous namespace
+
+void TranslatorVisitor::PIXLD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<31, 3, Mode> mode;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> addr_reg;
+ BitField<20, 8, s64> addr_offset;
+ BitField<45, 3, IR::Pred> dest_pred;
+ } const pixld{insn};
+
+ if (pixld.dest_pred != IR::Pred::PT) {
+ throw NotImplementedException("Destination predicate");
+ }
+ if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
+ throw NotImplementedException("Non-zero source register");
+ }
+ switch (pixld.mode) {
+ case Mode::MyIndex:
+ X(pixld.dest_reg, ir.SampleId());
+ break;
+ default:
+ throw NotImplementedException("Mode {}", pixld.mode.Value());
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<45, 2, BooleanOp> bop_2;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
+ const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
+ const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
+
+ ir.SetPred(pset.dest_pred_a, result_a);
+ ir.SetPred(pset.dest_pred_b, result_b);
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::PSET(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<12, 3, IR::Pred> pred_a;
+ BitField<15, 1, u64> neg_pred_a;
+ BitField<24, 2, BooleanOp> bop_1;
+ BitField<29, 3, IR::Pred> pred_b;
+ BitField<32, 1, u64> neg_pred_b;
+ BitField<39, 3, IR::Pred> pred_c;
+ BitField<42, 1, u64> neg_pred_c;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, BooleanOp> bop_2;
+ BitField<47, 1, u64> cc;
+ } const pset{insn};
+
+ const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
+ const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
+ const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
+
+ const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
+ const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
+
+ const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
+ const IR::U32 zero{ir.Imm32(0)};
+
+ const IR::U32 result{ir.Select(res_2, true_result, zero)};
+
+ X(pset.dest_reg, result);
+ if (pset.cc != 0) {
+ const IR::U1 is_zero{ir.IEqual(result, zero)};
+ SetZFlag(is_zero);
+ if (pset.bf != 0) {
+ ResetSFlag();
+ } else {
+ SetSFlag(ir.LogicalNot(is_zero));
+ }
+ ResetOFlag();
+ ResetCFlag();
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<39, 3, IR::Pred> pred;
+ BitField<42, 1, u64> neg_pred;
+ } const sel{insn};
+
+ const IR::U1 pred = v.ir.GetPred(sel.pred);
+ IR::U32 op_a{v.X(sel.src_reg)};
+ IR::U32 op_b{src};
+ if (sel.neg_pred != 0) {
+ std::swap(op_a, op_b);
+ }
+ const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
+
+ v.X(sel.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SEL_reg(u64 insn) {
+ SEL(*this, insn, GetReg20(insn));
+}
+
+void TranslatorVisitor::SEL_cbuf(u64 insn) {
+ SEL(*this, insn, GetCbuf(insn));
+}
+
+void TranslatorVisitor::SEL_imm(u64 insn) {
+ SEL(*this, insn, GetImm20(insn));
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+enum class Size : u64 {
+ U32,
+ S32,
+ U64,
+ S64,
+ F32FTZRN,
+ F16x2FTZRN,
+ SD32,
+ SD64,
+};
+
+enum class AtomicOp : u64 {
+ ADD,
+ MIN,
+ MAX,
+ INC,
+ DEC,
+ AND,
+ OR,
+ XOR,
+ EXCH,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
+ const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
+ bool is_signed) {
+ switch (op) {
+ case AtomicOp::ADD:
+ return ir.ImageAtomicIAdd(handle, coords, op_b, info);
+ case AtomicOp::MIN:
+ return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
+ case AtomicOp::MAX:
+ return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
+ case AtomicOp::INC:
+ return ir.ImageAtomicInc(handle, coords, op_b, info);
+ case AtomicOp::DEC:
+ return ir.ImageAtomicDec(handle, coords, op_b, info);
+ case AtomicOp::AND:
+ return ir.ImageAtomicAnd(handle, coords, op_b, info);
+ case AtomicOp::OR:
+ return ir.ImageAtomicOr(handle, coords, op_b, info);
+ case AtomicOp::XOR:
+ return ir.ImageAtomicXor(handle, coords, op_b, info);
+ case AtomicOp::EXCH:
+ return ir.ImageAtomicExchange(handle, coords, op_b, info);
+ default:
+ throw NotImplementedException("Atomic Operation {}", op);
+ }
+}
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return ImageFormat::R32_UINT;
+ default:
+ break;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+bool IsSizeInt32(Size size) {
+ switch (size) {
+ case Size::U32:
+ case Size::S32:
+ case Size::SD32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
+ IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
+ u64 bound_offset, bool is_bindless, bool write_result) {
+ if (clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", clamp);
+ }
+ if (!IsSizeInt32(size)) {
+ throw NotImplementedException("Size {}", size);
+ }
+ const bool is_signed{size == Size::S32};
+ const ImageFormat format{Format(size)};
+ const TextureType tex_type{GetType(type)};
+ const IR::Value coords{MakeCoords(v, coord_reg, type)};
+
+ const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
+ : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
+ IR::TextureInstInfo info{};
+ info.type.Assign(tex_type);
+ info.image_format.Assign(format);
+
+ // TODO: float/64-bit operand
+ const IR::Value op_b{v.X(operand_reg)};
+ const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
+
+ if (write_result) {
+ v.X(dest_reg, IR::U32{color});
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SUATOM(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> is_bindless;
+ BitField<29, 4, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<51, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> operand_reg;
+ BitField<36, 13, u64> bound_offset; // !is_bindless
+ BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
+ } const suatom{insn};
+
+ ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
+ suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
+ suatom.is_bindless != 0, true);
+}
+
+void TranslatorVisitor::SURED(u64 insn) {
+ // TODO: confirm offsets
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<21, 3, AtomicOp> op;
+ BitField<33, 3, Type> type;
+ BitField<20, 3, Size> size;
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> operand_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sured{insn};
+ ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
+ sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
+ sured.is_bound == 0, false);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bit>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Type : u64 {
+ _1D,
+ BUFFER_1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+};
+
+constexpr unsigned R = 1 << 0;
+constexpr unsigned G = 1 << 1;
+constexpr unsigned B = 1 << 2;
+constexpr unsigned A = 1 << 3;
+
+constexpr std::array MASK{
+ 0U, //
+ R, //
+ G, //
+ R | G, //
+ B, //
+ R | B, //
+ G | B, //
+ R | G | B, //
+ A, //
+ R | A, //
+ G | A, //
+ R | G | A, //
+ B | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+enum class Clamp : u64 {
+ IGN,
+ Default,
+ TRAP,
+};
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
+enum class LoadCache : u64 {
+ CA, // Cache at all levels, likely to be accessed again
+ CG, // Cache at global level (L2 and below, not L1)
+ CI, // ???
+ CV, // Don't cache and fetch again (volatile)
+};
+
+enum class StoreCache : u64 {
+ WB, // Cache write-back all coherent levels
+ CG, // Cache at global level (L2 and below, not L1)
+ CS, // Cache streaming, likely to be accessed once
+ WT, // Cache write-through (to system memory, volatile?)
+};
+
+ImageFormat Format(Size size) {
+ switch (size) {
+ case Size::U8:
+ return ImageFormat::R8_UINT;
+ case Size::S8:
+ return ImageFormat::R8_SINT;
+ case Size::U16:
+ return ImageFormat::R16_UINT;
+ case Size::S16:
+ return ImageFormat::R16_SINT;
+ case Size::B32:
+ return ImageFormat::R32_UINT;
+ case Size::B64:
+ return ImageFormat::R32G32_UINT;
+ case Size::B128:
+ return ImageFormat::R32G32B32A32_UINT;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+int SizeInRegs(Size size) {
+ switch (size) {
+ case Size::U8:
+ case Size::S8:
+ case Size::U16:
+ case Size::S16:
+ case Size::B32:
+ return 1;
+ case Size::B64:
+ return 2;
+ case Size::B128:
+ return 4;
+ }
+ throw NotImplementedException("Invalid size {}", size);
+}
+
+TextureType GetType(Type type) {
+ switch (type) {
+ case Type::_1D:
+ return TextureType::Color1D;
+ case Type::BUFFER_1D:
+ return TextureType::Buffer;
+ case Type::ARRAY_1D:
+ return TextureType::ColorArray1D;
+ case Type::_2D:
+ return TextureType::Color2D;
+ case Type::ARRAY_2D:
+ return TextureType::ColorArray2D;
+ case Type::_3D:
+ return TextureType::Color3D;
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
+ const auto array{[&](int index) {
+ return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
+ }};
+ switch (type) {
+ case Type::_1D:
+ case Type::BUFFER_1D:
+ return v.X(reg);
+ case Type::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg), array(1));
+ case Type::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case Type::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
+ case Type::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ }
+ throw NotImplementedException("Invalid type {}", type);
+}
+
+unsigned SwizzleMask(u64 swizzle) {
+ if (swizzle == 0 || swizzle >= MASK.size()) {
+ throw NotImplementedException("Invalid swizzle {}", swizzle);
+ }
+ return MASK[swizzle];
+}
+
+IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
+ std::array<IR::U32, 4> colors;
+ for (int i = 0; i < num_regs; ++i) {
+ colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
+ }
+ for (int i = num_regs; i < 4; ++i) {
+ colors[static_cast<size_t>(i)] = ir.Imm32(0);
+ }
+ return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SULD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, LoadCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const suld{insn};
+
+ if (suld.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", suld.clamp.Value());
+ }
+ if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
+ throw NotImplementedException("Cache {}", suld.cache.Value());
+ }
+ const bool is_typed{suld.d != 0};
+ if (is_typed && suld.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+
+ const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(suld.type)};
+ const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
+ const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
+ : X(suld.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ const IR::Value result{ir.ImageRead(handle, coords, info)};
+ IR::Reg dest_reg{suld.dest_reg};
+ if (is_typed) {
+ const int num_regs{SizeInRegs(suld.size)};
+ for (int i = 0; i < num_regs; ++i) {
+ X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
+ }
+ } else {
+ const unsigned mask{SwizzleMask(suld.swizzle)};
+ const int bits{std::popcount(mask)};
+ if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
+ throw NotImplementedException("Unaligned destination register");
+ }
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((mask >> component) & 1) == 0) {
+ continue;
+ }
+ X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
+ ++dest_reg;
+ }
+ }
+}
+
+void TranslatorVisitor::SUST(u64 insn) {
+ union {
+ u64 raw;
+ BitField<51, 1, u64> is_bound;
+ BitField<52, 1, u64> d;
+ BitField<23, 1, u64> ba;
+ BitField<33, 3, Type> type;
+ BitField<24, 2, StoreCache> cache;
+ BitField<20, 3, Size> size; // .D
+ BitField<20, 4, u64> swizzle; // .P
+ BitField<49, 2, Clamp> clamp;
+ BitField<0, 8, IR::Reg> data_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<36, 13, u64> bound_offset; // is_bound
+ BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
+ } const sust{insn};
+
+ if (sust.clamp != Clamp::IGN) {
+ throw NotImplementedException("Clamp {}", sust.clamp.Value());
+ }
+ if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
+ throw NotImplementedException("Cache {}", sust.cache.Value());
+ }
+ const bool is_typed{sust.d != 0};
+ if (is_typed && sust.ba != 0) {
+ throw NotImplementedException("BA");
+ }
+ const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
+ const TextureType type{GetType(sust.type)};
+ const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
+ const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
+ : X(sust.bindless_reg)};
+ IR::TextureInstInfo info{};
+ info.type.Assign(type);
+ info.image_format.Assign(format);
+
+ IR::Value color;
+ if (is_typed) {
+ color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
+ } else {
+ const unsigned mask{SwizzleMask(sust.swizzle)};
+ if (mask != 0xf) {
+ throw NotImplementedException("Non-full mask");
+ }
+ color = MakeColor(ir, sust.data_reg, 4);
+ }
+ ir.ImageWrite(handle, coords, color, info);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Blod : u64 {
+ None,
+ LZ,
+ LB,
+ LL,
+ INVALIDBLOD4,
+ INVALIDBLOD5,
+ LBA,
+ LLA,
+};
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
+ switch (blod) {
+ case Blod::None:
+ return v.ir.Imm32(0.0f);
+ case Blod::LZ:
+ return v.ir.Imm32(0.0f);
+ case Blod::LB:
+ case Blod::LL:
+ case Blod::LBA:
+ case Blod::LLA:
+ return v.F(reg++);
+ case Blod::INVALIDBLOD4:
+ case Blod::INVALIDBLOD5:
+ break;
+ }
+ throw NotImplementedException("Invalid blod {}", blod);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+bool HasExplicitLod(Blod blod) {
+ switch (blod) {
+ case Blod::LL:
+ case Blod::LLA:
+ case Blod::LZ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
+ std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ } const tex{insn};
+
+ if (lc) {
+ throw NotImplementedException("LC");
+ }
+ const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
+
+ IR::Reg meta_reg{tex.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::F32 dref;
+ IR::F32 lod_clamp;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ const IR::F32 lod{MakeLod(v, meta_reg, blod)};
+ if (aoffi) {
+ offset = MakeOffset(v, meta_reg, tex.type);
+ }
+ if (tex.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tex.type));
+ info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
+ info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
+ info.has_lod_clamp.Assign(lc ? 1 : 0);
+
+ const IR::Value sample{[&]() -> IR::Value {
+ if (tex.dc == 0) {
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
+ }
+ }
+ if (HasExplicitLod(blod)) {
+ return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
+ } else {
+ return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
+ info);
+ }
+ }()};
+
+ IR::Reg dest_reg{tex.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((tex.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value;
+ if (tex.dc != 0) {
+ value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
+ } else {
+ value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
+ }
+ v.F(dest_reg, value);
+ ++dest_reg;
+ }
+ if (tex.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<54, 1, u64> aoffi;
+ BitField<55, 3, Blod> blod;
+ BitField<58, 1, u64> lc;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TEX_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 1, u64> aoffi;
+ BitField<37, 3, Blod> blod;
+ BitField<40, 1, u64> lc;
+ } const tex{insn};
+
+ Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<53, 4, u64> encoding;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+template <typename... Args>
+IR::Value Composite(TranslatorVisitor& v, Args... regs) {
+ return v.ir.CompositeConstruct(v.F(regs)...);
+}
+
+IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
+ return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding texs{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const IR::Reg reg_a{texs.src_reg_a};
+ const IR::Reg reg_b{texs.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (texs.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ switch (texs.encoding) {
+ case 0: // 1D.LZ
+ info.type.Assign(TextureType::Color1D);
+ return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
+ case 1: // 2D
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
+ case 2: // 2D.LZ
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
+ case 3: // 2D.LL
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
+ info);
+ case 4: // 2D.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ {}, {}, {}, info);
+ case 5: // 2D.LL.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
+ v.F(reg_b + 1), v.F(reg_b), {}, info);
+ case 6: // 2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+ zero, {}, info);
+ case 7: // ARRAY_2D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleImplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ {}, {}, {}, info);
+ case 8: // ARRAY_2D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ return v.ir.ImageSampleExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ zero, {}, info);
+ case 9: // ARRAY_2D.LZ.DC
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorArray2D);
+ info.is_depth.Assign(1);
+ return v.ir.ImageSampleDrefExplicitLod(
+ handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+ v.F(reg_b + 1), zero, {}, info);
+ case 10: // 3D
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 11: // 3D.LZ
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::Color3D);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
+ info);
+ case 12: // CUBE
+ CheckAlignment(reg_a, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+ {}, info);
+ case 13: // CUBE.LL
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ info.type.Assign(TextureType::ColorCube);
+ return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
+ v.F(reg_b + 1), {}, info);
+ default:
+ throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
+ }
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding texs{insn};
+ const size_t encoding{texs.swizzle};
+ if (texs.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ const bool is_shadow{sample.Type() == IR::Type::F32};
+ if (is_shadow) {
+ const bool is_alpha{component == 3};
+ return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
+ } else {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding texs{insn};
+ switch (index) {
+ case 0:
+ return texs.dest_reg_a;
+ case 1:
+ CheckAlignment(texs.dest_reg_a, 2);
+ return texs.dest_reg_a + 1;
+ case 2:
+ return texs.dest_reg_b;
+ case 3:
+ CheckAlignment(texs.dest_reg_b, 2);
+ return texs.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding texs{insn};
+ switch (store_index) {
+ case 1:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEXS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+enum class OffsetType : u64 {
+ None = 0,
+ AOFFI,
+ PTP,
+ Invalid,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
+ const IR::U32 value1{v.X(reg++)};
+ const IR::U32 value2{v.X(reg++)};
+ const IR::U32 bitsize{v.ir.Imm32(6)};
+ const auto make_vector{[&v, &bitsize](const IR::U32& value) {
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
+ }};
+ return {make_vector(value1), make_vector(value2)};
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
+ bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<35, 1, u64> ndv;
+ BitField<49, 1, u64> nodep;
+ BitField<50, 1, u64> dc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld4{insn};
+
+ const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
+
+ IR::Reg meta_reg{tld4.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::Value offset2;
+ IR::F32 dref;
+ if (!is_bindless) {
+ handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
+ } else {
+ handle = v.X(meta_reg++);
+ }
+ switch (offset_type) {
+ case OffsetType::None:
+ break;
+ case OffsetType::AOFFI:
+ offset = MakeOffset(v, meta_reg, tld4.type);
+ break;
+ case OffsetType::PTP:
+ std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
+ break;
+ default:
+ throw NotImplementedException("Invalid offset type {}", offset_type);
+ }
+ if (tld4.dc != 0) {
+ dref = v.F(meta_reg++);
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld4.type));
+ info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
+ info.gather_component.Assign(static_cast<u32>(component_type));
+ const IR::Value sample{[&] {
+ if (tld4.dc == 0) {
+ return v.ir.ImageGather(handle, coords, offset, offset2, info);
+ }
+ return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
+ }()};
+
+ IR::Reg dest_reg{tld4.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld4.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld4.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4(u64 insn) {
+ union {
+ u64 raw;
+ BitField<56, 2, ComponentType> component;
+ BitField<54, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, false);
+}
+
+void TranslatorVisitor::TLD4_b(u64 insn) {
+ union {
+ u64 raw;
+ BitField<38, 2, ComponentType> component;
+ BitField<36, 2, OffsetType> offset;
+ } const tld4{insn};
+ Impl(*this, insn, tld4.component, tld4.offset, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F32,
+ F16,
+};
+
+enum class ComponentType : u64 {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+};
+
+union Encoding {
+ u64 raw;
+ BitField<55, 1, Precision> precision;
+ BitField<52, 2, ComponentType> component_type;
+ BitField<51, 1, u64> aoffi;
+ BitField<50, 1, u64> dc;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tld4s{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
+ const IR::Reg reg_a{tld4s.src_reg_a};
+ const IR::Reg reg_b{tld4s.src_reg_b};
+ IR::TextureInstInfo info{};
+ if (tld4s.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
+ info.type.Assign(Shader::TextureType::Color2D);
+ info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
+ IR::Value coords;
+ if (tld4s.aoffi != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::Value offset = MakeOffset(v, reg_b);
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_b, 2);
+ IR::F32 dref = v.F(reg_b + 1);
+ return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
+ }
+ return v.ir.ImageGather(handle, coords, offset, {}, info);
+ }
+ if (tld4s.dc != 0) {
+ CheckAlignment(reg_a, 2);
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
+ IR::F32 dref = v.F(reg_b);
+ return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
+ }
+ coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
+ return v.ir.ImageGather(handle, coords, {}, {}, info);
+}
+
+IR::Reg RegStoreComponent32(u64 insn, size_t index) {
+ const Encoding tlds4{insn};
+ switch (index) {
+ case 0:
+ return tlds4.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds4.dest_reg_a, 2);
+ return tlds4.dest_reg_a + 1;
+ case 2:
+ return tlds4.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds4.dest_reg_b, 2);
+ return tlds4.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ for (size_t component = 0; component < 4; ++component) {
+ const IR::Reg dest{RegStoreComponent32(insn, component)};
+ v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ std::array<IR::F32, 4> swizzled;
+ for (size_t component = 0; component < 4; ++component) {
+ swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
+ }
+ const Encoding tld4s{insn};
+ v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD4S(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
+ const IR::U32 value{v.X(reg)};
+ const u32 base{has_lod_clamp ? 12U : 16U};
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> aoffi;
+ BitField<50, 1, u64> lc;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> derivate_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txd{insn};
+
+ const bool has_lod_clamp = txd.lc != 0;
+ if (has_lod_clamp) {
+ throw NotImplementedException("TXD.LC - CLAMP is not implemented");
+ }
+
+ IR::Value coords;
+ u32 num_derivates{};
+ IR::Reg base_reg{txd.coord_reg};
+ IR::Reg last_reg;
+ IR::Value handle;
+ if (is_bindless) {
+ handle = v.X(base_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
+ }
+
+ const auto read_array{[&]() -> IR::F32 {
+ const IR::U32 base{v.ir.Imm32(0)};
+ const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
+ const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
+ return v.ir.ConvertUToF(32, 16, array_index);
+ }};
+ switch (txd.type) {
+ case TextureType::_1D: {
+ coords = v.F(base_reg);
+ num_derivates = 1;
+ last_reg = base_reg + 1;
+ break;
+ }
+ case TextureType::ARRAY_1D: {
+ last_reg = base_reg + 1;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
+ num_derivates = 1;
+ break;
+ }
+ case TextureType::_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
+ num_derivates = 2;
+ break;
+ }
+ case TextureType::ARRAY_2D: {
+ last_reg = base_reg + 2;
+ coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
+ num_derivates = 2;
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ const IR::Reg derivate_reg{txd.derivate_reg};
+ IR::Value derivates;
+ switch (num_derivates) {
+ case 1: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
+ break;
+ }
+ case 2: {
+ derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
+ v.F(derivate_reg + 2), v.F(derivate_reg + 3));
+ break;
+ }
+ default:
+ throw NotImplementedException("Invalid texture type");
+ }
+
+ IR::Value offset;
+ if (txd.aoffi != 0) {
+ offset = MakeOffset(v, last_reg, has_lod_clamp);
+ }
+
+ IR::F32 lod_clamp;
+ if (has_lod_clamp) {
+ // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
+ // to convert a fixed point, float(value) / float(1 << fixed_point)
+ // in this case the fixed_point is 8.
+ const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
+ const IR::F32 fixp_lc{v.ir.ConvertUToF(
+ 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
+ lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
+ }
+
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(txd.type));
+ info.num_derivates.Assign(num_derivates);
+ info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
+ const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
+
+ IR::Reg dest_reg{txd.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((txd.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (txd.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TXD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ const auto read_array{
+ [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
+ switch (type) {
+ case TextureType::_1D:
+ return v.X(reg);
+ case TextureType::ARRAY_1D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
+ const IR::U32 value{v.X(reg++)};
+ switch (type) {
+ case TextureType::_1D:
+ case TextureType::ARRAY_1D:
+ return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
+ case TextureType::_2D:
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+ case TextureType::_3D:
+ case TextureType::ARRAY_3D:
+ return v.ir.CompositeConstruct(
+ v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
+ case TextureType::CUBE:
+ case TextureType::ARRAY_CUBE:
+ throw NotImplementedException("Illegal offset on CUBE sample");
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<55, 1, u64> lod;
+ BitField<50, 1, u64> multisample;
+ BitField<35, 1, u64> aoffi;
+ BitField<54, 1, u64> clamp;
+ BitField<51, 3, IR::Pred> sparse_pred;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tld{insn};
+
+ const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
+
+ IR::Reg meta_reg{tld.meta_reg};
+ IR::Value handle;
+ IR::Value offset;
+ IR::U32 lod;
+ IR::U32 multisample;
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
+ }
+ if (tld.lod != 0) {
+ lod = v.X(meta_reg++);
+ } else {
+ lod = v.ir.Imm32(0U);
+ }
+ if (tld.aoffi != 0) {
+ offset = MakeOffset(v, meta_reg, tld.type);
+ }
+ if (tld.multisample != 0) {
+ multisample = v.X(meta_reg++);
+ }
+ if (tld.clamp != 0) {
+ throw NotImplementedException("TLD.CL - CLAMP is not implmented");
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tld.type));
+ const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
+
+ IR::Reg dest_reg{tld.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tld.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
+ ++dest_reg;
+ }
+ if (tld.sparse_pred != IR::Pred::PT) {
+ v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLD(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TLD_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+ F16,
+ F32,
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+ R, //
+ G, //
+ B, //
+ A, //
+ R | G, //
+ R | A, //
+ G | A, //
+ B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+ R | G | B, //
+ R | G | A, //
+ R | B | A, //
+ G | B | A, //
+ R | G | B | A, //
+};
+
+union Encoding {
+ u64 raw;
+ BitField<59, 1, Precision> precision;
+ BitField<54, 1, u64> aoffi;
+ BitField<53, 1, u64> lod;
+ BitField<55, 1, u64> ms;
+ BitField<49, 1, u64> nodep;
+ BitField<28, 8, IR::Reg> dest_reg_b;
+ BitField<0, 8, IR::Reg> dest_reg_a;
+ BitField<8, 8, IR::Reg> src_reg_a;
+ BitField<20, 8, IR::Reg> src_reg_b;
+ BitField<36, 13, u64> cbuf_offset;
+ BitField<50, 3, u64> swizzle;
+ BitField<53, 4, u64> encoding;
+};
+
+void CheckAlignment(IR::Reg reg, size_t alignment) {
+ if (!IR::IsAligned(reg, alignment)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+}
+
+IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
+ const IR::U32 value{v.X(reg)};
+ return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
+ v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+ const Encoding tlds{insn};
+ const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
+ const IR::Reg reg_a{tlds.src_reg_a};
+ const IR::Reg reg_b{tlds.src_reg_b};
+ IR::Value coords;
+ IR::U32 lod{v.ir.Imm32(0U)};
+ IR::Value offsets;
+ IR::U32 multisample;
+ Shader::TextureType texture_type{};
+ switch (tlds.encoding) {
+ case 0:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ break;
+ case 1:
+ texture_type = Shader::TextureType::Color1D;
+ coords = v.X(reg_a);
+ lod = v.X(reg_b);
+ break;
+ case 2:
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
+ break;
+ case 4:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ offsets = MakeOffset(v, reg_b);
+ break;
+ case 5:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ break;
+ case 6:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ multisample = v.X(reg_b);
+ break;
+ case 7:
+ CheckAlignment(reg_a, 2);
+ texture_type = Shader::TextureType::Color3D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
+ break;
+ case 8: {
+ CheckAlignment(reg_b, 2);
+ const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
+ texture_type = Shader::TextureType::ColorArray2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
+ break;
+ }
+ case 12:
+ CheckAlignment(reg_a, 2);
+ CheckAlignment(reg_b, 2);
+ texture_type = Shader::TextureType::Color2D;
+ coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
+ lod = v.X(reg_b);
+ offsets = MakeOffset(v, reg_b + 1);
+ break;
+ default:
+ throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
+ }
+ IR::TextureInstInfo info{};
+ if (tlds.precision == Precision::F16) {
+ info.relaxed_precision.Assign(1);
+ }
+ info.type.Assign(texture_type);
+ return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
+}
+
+unsigned Swizzle(u64 insn) {
+ const Encoding tlds{insn};
+ const size_t encoding{tlds.swizzle};
+ if (tlds.dest_reg_b == IR::Reg::RZ) {
+ if (encoding >= RG_LUT.size()) {
+ throw NotImplementedException("Illegal RG encoding {}", encoding);
+ }
+ return RG_LUT[encoding];
+ } else {
+ if (encoding >= RGBA_LUT.size()) {
+ throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+ }
+ return RGBA_LUT[encoding];
+ }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+ return IR::F32{v.ir.CompositeExtract(sample, component)};
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+ const Encoding tlds{insn};
+ switch (index) {
+ case 0:
+ return tlds.dest_reg_a;
+ case 1:
+ CheckAlignment(tlds.dest_reg_a, 2);
+ return tlds.dest_reg_a + 1;
+ case 2:
+ return tlds.dest_reg_b;
+ case 3:
+ CheckAlignment(tlds.dest_reg_b, 2);
+ return tlds.dest_reg_b + 1;
+ }
+ throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+ v.F(dest, Extract(v, sample, component));
+ ++store_index;
+ }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+ return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+ const unsigned swizzle{Swizzle(insn)};
+ unsigned store_index{0};
+ std::array<IR::F32, 4> swizzled;
+ for (unsigned component = 0; component < 4; ++component) {
+ if (((swizzle >> component) & 1) == 0) {
+ continue;
+ }
+ swizzled[store_index] = Extract(v, sample, component);
+ ++store_index;
+ }
+ const IR::F32 zero{v.ir.Imm32(0.0f)};
+ const Encoding tlds{insn};
+ switch (store_index) {
+ case 1:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+ switch (store_index) {
+ case 2:
+ break;
+ case 3:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
+ break;
+ case 4:
+ v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+ break;
+ }
+ break;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TLDS(u64 insn) {
+ const IR::Value sample{Sample(*this, insn)};
+ if (Encoding{insn}.precision == Precision::F32) {
+ Store32(*this, insn, sample);
+ } else {
+ Store16(*this, insn, sample);
+ }
+}
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+
+enum class TextureType : u64 {
+ _1D,
+ ARRAY_1D,
+ _2D,
+ ARRAY_2D,
+ _3D,
+ ARRAY_3D,
+ CUBE,
+ ARRAY_CUBE,
+};
+
+Shader::TextureType GetType(TextureType type) {
+ switch (type) {
+ case TextureType::_1D:
+ return Shader::TextureType::Color1D;
+ case TextureType::ARRAY_1D:
+ return Shader::TextureType::ColorArray1D;
+ case TextureType::_2D:
+ return Shader::TextureType::Color2D;
+ case TextureType::ARRAY_2D:
+ return Shader::TextureType::ColorArray2D;
+ case TextureType::_3D:
+ return Shader::TextureType::Color3D;
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return Shader::TextureType::ColorCube;
+ case TextureType::ARRAY_CUBE:
+ return Shader::TextureType::ColorArrayCube;
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
+ // The ISA reads an array component here, but this is not needed on high level shading languages
+ // We are dropping this information.
+ switch (type) {
+ case TextureType::_1D:
+ return v.F(reg);
+ case TextureType::ARRAY_1D:
+ return v.F(reg + 1);
+ case TextureType::_2D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
+ case TextureType::ARRAY_2D:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
+ case TextureType::_3D:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_3D:
+ throw NotImplementedException("3D array texture type");
+ case TextureType::CUBE:
+ return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
+ case TextureType::ARRAY_CUBE:
+ return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
+ }
+ throw NotImplementedException("Invalid texture type {}", type);
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<35, 1, u64> ndv;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> coord_reg;
+ BitField<20, 8, IR::Reg> meta_reg;
+ BitField<28, 3, TextureType> type;
+ BitField<31, 4, u64> mask;
+ BitField<36, 13, u64> cbuf_offset;
+ } const tmml{insn};
+
+ if ((tmml.mask & 0b1100) != 0) {
+ throw NotImplementedException("TMML BA results are not implmented");
+ }
+ const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
+
+ IR::U32 handle;
+ IR::Reg meta_reg{tmml.meta_reg};
+ if (is_bindless) {
+ handle = v.X(meta_reg++);
+ } else {
+ handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
+ }
+ IR::TextureInstInfo info{};
+ info.type.Assign(GetType(tmml.type));
+ const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
+
+ IR::Reg dest_reg{tmml.dest_reg};
+ for (size_t element = 0; element < 4; ++element) {
+ if (((tmml.mask >> element) & 1) == 0) {
+ continue;
+ }
+ IR::F32 value{v.ir.CompositeExtract(sample, element)};
+ if (element < 2) {
+ IR::U32 casted_value;
+ if (element == 0) {
+ casted_value = v.ir.ConvertFToU(32, value);
+ } else {
+ casted_value = v.ir.ConvertFToS(16, value);
+ }
+ v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
+ } else {
+ v.F(dest_reg, value);
+ }
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TMML(u64 insn) {
+ Impl(*this, insn, false);
+}
+
+void TranslatorVisitor::TMML_b(u64 insn) {
+ Impl(*this, insn, true);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Mode : u64 {
+ Dimension = 1,
+ TextureType = 2,
+ SamplePos = 5,
+};
+
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+ switch (mode) {
+ case Mode::Dimension: {
+ const IR::U32 lod{v.X(src_reg)};
+ return v.ir.ImageQueryDimension(handle, lod);
+ }
+ case Mode::TextureType:
+ case Mode::SamplePos:
+ default:
+ throw NotImplementedException("Mode {}", mode);
+ }
+}
+
+void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
+ union {
+ u64 raw;
+ BitField<49, 1, u64> nodep;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<22, 3, Mode> mode;
+ BitField<31, 4, u64> mask;
+ } const txq{insn};
+
+ IR::Reg src_reg{txq.src_reg};
+ IR::U32 handle;
+ if (cbuf_offset) {
+ handle = v.ir.Imm32(*cbuf_offset);
+ } else {
+ handle = v.X(src_reg);
+ ++src_reg;
+ }
+ const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+ IR::Reg dest_reg{txq.dest_reg};
+ for (int element = 0; element < 4; ++element) {
+ if (((txq.mask >> element) & 1) == 0) {
+ continue;
+ }
+ v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
+ ++dest_reg;
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TXQ(u64 insn) {
+ union {
+ u64 raw;
+ BitField<36, 13, u64> cbuf_offset;
+ } const txq{insn};
+
+ Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
+}
+
+void TranslatorVisitor::TXQ_b(u64 insn) {
+ Impl(*this, insn, std::nullopt);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+
+IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
+ u32 selector, bool is_signed) {
+ switch (width) {
+ case VideoWidth::Byte:
+ case VideoWidth::Unknown:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
+ case VideoWidth::Short:
+ return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
+ case VideoWidth::Word:
+ return value;
+ default:
+ throw NotImplementedException("Unknown VideoWidth {}", width);
+ }
+}
+
+VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
+ // immediates must be 16-bit format.
+ return is_immediate ? VideoWidth::Short : width;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+enum class VideoWidth : u64 {
+ Byte,
+ Unknown,
+ Short,
+ Word,
+};
+
+[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
+ VideoWidth width, u32 selector, bool is_signed);
+
+[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VideoMinMaxOps : u64 {
+ MRG_16H,
+ MRG_16L,
+ MRG_8B0,
+ MRG_8B2,
+ ACC,
+ MIN,
+ MAX,
+};
+
+[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
+ VideoMinMaxOps op, bool is_signed) {
+ switch (op) {
+ case VideoMinMaxOps::MIN:
+ return ir.IMin(lhs, rhs, is_signed);
+ case VideoMinMaxOps::MAX:
+ return ir.IMax(lhs, rhs, is_signed);
+ default:
+ throw NotImplementedException("VMNMX op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VMNMX(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 3, VideoMinMaxOps> op;
+ BitField<54, 1, u64> dest_sign;
+ BitField<55, 1, u64> sat;
+ BitField<56, 1, u64> mx;
+ } const vmnmx{insn};
+
+ if (vmnmx.cc != 0) {
+ throw NotImplementedException("VMNMX CC");
+ }
+ if (vmnmx.sat != 0) {
+ throw NotImplementedException("VMNMX SAT");
+ }
+ // Selectors were shown to default to 2 in unit tests
+ if (vmnmx.src_a_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
+ }
+ if (vmnmx.src_b_selector != 2) {
+ throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
+ }
+ if (vmnmx.src_a_width != VideoWidth::Word) {
+ throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
+ }
+
+ const bool is_b_imm{vmnmx.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const VideoWidth a_width{vmnmx.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmnmx.src_a_sign != 0};
+ const bool src_b_signed{vmnmx.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
+
+ // First operation's sign is only dependent on operand b's sign
+ const bool op_1_signed{src_b_signed};
+
+ const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
+ : ir.IMin(op_a, op_b, op_1_signed)};
+ X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+void TranslatorVisitor::VMAD(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<47, 1, u64> cc;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ BitField<51, 2, u64> scale;
+ BitField<53, 1, u64> src_c_neg;
+ BitField<54, 1, u64> src_a_neg;
+ BitField<55, 1, u64> sat;
+ } const vmad{insn};
+
+ if (vmad.cc != 0) {
+ throw NotImplementedException("VMAD CC");
+ }
+ if (vmad.sat != 0) {
+ throw NotImplementedException("VMAD SAT");
+ }
+ if (vmad.scale != 0) {
+ throw NotImplementedException("VMAD SCALE");
+ }
+ if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD PO");
+ }
+ if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
+ throw NotImplementedException("VMAD NEG");
+ }
+ const bool is_b_imm{vmad.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
+ const IR::U32 src_c{GetReg39(insn)};
+
+ const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
+ // Immediate values can't have a selector
+ const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
+ const VideoWidth a_width{vmad.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vmad.src_a_sign != 0};
+ const bool src_b_signed{vmad.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VsetpCompareOp : u64 {
+ False = 0,
+ LessThan,
+ Equal,
+ LessThanEqual,
+ GreaterThan = 16,
+ NotEqual,
+ GreaterThanEqual,
+ True,
+};
+
+CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
+ switch (op) {
+ case VsetpCompareOp::False:
+ return CompareOp::False;
+ case VsetpCompareOp::LessThan:
+ return CompareOp::LessThan;
+ case VsetpCompareOp::Equal:
+ return CompareOp::Equal;
+ case VsetpCompareOp::LessThanEqual:
+ return CompareOp::LessThanEqual;
+ case VsetpCompareOp::GreaterThan:
+ return CompareOp::GreaterThan;
+ case VsetpCompareOp::NotEqual:
+ return CompareOp::NotEqual;
+ case VsetpCompareOp::GreaterThanEqual:
+ return CompareOp::GreaterThanEqual;
+ case VsetpCompareOp::True:
+ return CompareOp::True;
+ default:
+ throw NotImplementedException("Invalid compare op {}", op);
+ }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VSETP(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 3, IR::Pred> dest_pred_b;
+ BitField<3, 3, IR::Pred> dest_pred_a;
+ BitField<20, 16, u64> src_b_imm;
+ BitField<28, 2, u64> src_b_selector;
+ BitField<29, 2, VideoWidth> src_b_width;
+ BitField<36, 2, u64> src_a_selector;
+ BitField<37, 2, VideoWidth> src_a_width;
+ BitField<39, 3, IR::Pred> bop_pred;
+ BitField<42, 1, u64> neg_bop_pred;
+ BitField<43, 5, VsetpCompareOp> compare_op;
+ BitField<45, 2, BooleanOp> bop;
+ BitField<48, 1, u64> src_a_sign;
+ BitField<49, 1, u64> src_b_sign;
+ BitField<50, 1, u64> is_src_b_reg;
+ } const vsetp{insn};
+
+ const bool is_b_imm{vsetp.is_src_b_reg == 0};
+ const IR::U32 src_a{GetReg8(insn)};
+ const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
+
+ const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
+ const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
+ const VideoWidth a_width{vsetp.src_a_width};
+ const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
+
+ const bool src_a_signed{vsetp.src_a_sign != 0};
+ const bool src_b_signed{vsetp.src_b_sign != 0};
+ const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
+ const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
+
+ // Compare operation's sign is only dependent on operand b's sign
+ const bool compare_signed{src_b_signed};
+ const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
+ const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
+ const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
+ const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
+ const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
+ ir.SetPred(vsetp.dest_pred_a, result_a);
+ ir.SetPred(vsetp.dest_pred_b, result_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class VoteOp : u64 {
+ ALL,
+ ANY,
+ EQ,
+};
+
+[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
+ switch (vote_op) {
+ case VoteOp::ALL:
+ return ir.VoteAll(pred);
+ case VoteOp::ANY:
+ return ir.VoteAny(pred);
+ case VoteOp::EQ:
+ return ir.VoteEqual(pred);
+ default:
+ throw NotImplementedException("Invalid VOTE op {}", vote_op);
+ }
+}
+
+void Vote(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<39, 3, IR::Pred> pred_a;
+ BitField<42, 1, u64> neg_pred_a;
+ BitField<45, 3, IR::Pred> pred_b;
+ BitField<48, 2, VoteOp> vote_op;
+ } const vote{insn};
+
+ const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
+ v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
+ v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::VOTE(u64 insn) {
+ Vote(*this, insn);
+}
+
+void TranslatorVisitor::VOTE_vtg(u64) {
+ LOG_WARNING(Shader, "(STUBBED) called");
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class ShuffleMode : u64 {
+ IDX,
+ UP,
+ DOWN,
+ BFLY,
+};
+
+[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
+ const IR::U32& index, const IR::U32& mask,
+ ShuffleMode shfl_op) {
+ const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
+ const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
+ switch (shfl_op) {
+ case ShuffleMode::IDX:
+ return ir.ShuffleIndex(value, index, clamp, seg_mask);
+ case ShuffleMode::UP:
+ return ir.ShuffleUp(value, index, clamp, seg_mask);
+ case ShuffleMode::DOWN:
+ return ir.ShuffleDown(value, index, clamp, seg_mask);
+ case ShuffleMode::BFLY:
+ return ir.ShuffleButterfly(value, index, clamp, seg_mask);
+ default:
+ throw NotImplementedException("Invalid SHFL op {}", shfl_op);
+ }
+}
+
+void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
+ union {
+ u64 insn;
+ BitField<0, 8, IR::Reg> dest_reg;
+ BitField<8, 8, IR::Reg> src_reg;
+ BitField<30, 2, ShuffleMode> mode;
+ BitField<48, 3, IR::Pred> pred;
+ } const shfl{insn};
+
+ const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
+ v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
+ v.X(shfl.dest_reg, result);
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::SHFL(u64 insn) {
+ union {
+ u64 insn;
+ BitField<20, 5, u64> src_a_imm;
+ BitField<28, 1, u64> src_a_flag;
+ BitField<29, 1, u64> src_b_flag;
+ BitField<34, 13, u64> src_b_imm;
+ } const flags{insn};
+ const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
+ : GetReg20(insn)};
+ const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
+ : GetReg39(insn)};
+ Shuffle(*this, insn, src_a, src_b);
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/decode.h"
+#include "shader_recompiler/frontend/maxwell/location.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+
+namespace Shader::Maxwell {
+
+template <auto method>
+static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
+ using MethodType = decltype(method);
+ if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
+ (visitor.*method)(pc, insn);
+ } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
+ (visitor.*method)(insn);
+ } else {
+ (visitor.*method)();
+ }
+}
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
+ if (location_begin == location_end) {
+ return;
+ }
+ TranslatorVisitor visitor{env, *block};
+ for (Location pc = location_begin; pc != location_end; ++pc) {
+ const u64 insn{env.ReadInstruction(pc.Offset())};
+ try {
+ const Opcode opcode{Decode(insn)};
+ switch (opcode) {
+#define INST(name, cute, mask) \
+ case Opcode::name: \
+ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
+ break;
+#include "shader_recompiler/frontend/maxwell/maxwell.inc"
+#undef OPCODE
+ default:
+ throw LogicError("Invalid opcode {}", opcode);
+ }
+ } catch (Exception& exception) {
+ exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
+ throw;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+
+namespace Shader::Maxwell {
+
+void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "common/settings.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/post_order.h"
+#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Maxwell {
+namespace {
+IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
+ size_t num_syntax_blocks{};
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ ++num_syntax_blocks;
+ }
+ }
+ IR::BlockList blocks;
+ blocks.reserve(num_syntax_blocks);
+ for (const auto& node : syntax_list) {
+ if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+ blocks.push_back(node.data.block);
+ }
+ }
+ return blocks;
+}
+
+void RemoveUnreachableBlocks(IR::Program& program) {
+ // Some blocks might be unreachable if a function call exists unconditionally
+ // If this happens the number of blocks and post order blocks will mismatch
+ if (program.blocks.size() == program.post_order_blocks.size()) {
+ return;
+ }
+ const auto begin{program.blocks.begin() + 1};
+ const auto end{program.blocks.end()};
+ const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
+ program.blocks.erase(std::remove_if(begin, end, pred), end);
+}
+
+void CollectInterpolationInfo(Environment& env, IR::Program& program) {
+ if (program.stage != Stage::Fragment) {
+ return;
+ }
+ const ProgramHeader& sph{env.SPH()};
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ std::optional<PixelImap> imap;
+ for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
+ if (value == PixelImap::Unused) {
+ continue;
+ }
+ if (imap && imap != value) {
+ throw NotImplementedException("Per component interpolation");
+ }
+ imap = value;
+ }
+ if (!imap) {
+ continue;
+ }
+ program.info.interpolation[index] = [&] {
+ switch (*imap) {
+ case PixelImap::Unused:
+ case PixelImap::Perspective:
+ return Interpolation::Smooth;
+ case PixelImap::Constant:
+ return Interpolation::Flat;
+ case PixelImap::ScreenLinear:
+ return Interpolation::NoPerspective;
+ }
+ throw NotImplementedException("Unknown interpolation {}", *imap);
+ }();
+ }
+}
+
+void AddNVNStorageBuffers(IR::Program& program) {
+ if (!program.info.uses_global_memory) {
+ return;
+ }
+ const u32 driver_cbuf{0};
+ const u32 descriptor_size{0x10};
+ const u32 num_buffers{16};
+ const u32 base{[&] {
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return 0x110u;
+ case Stage::TessellationControl:
+ return 0x210u;
+ case Stage::TessellationEval:
+ return 0x310u;
+ case Stage::Geometry:
+ return 0x410u;
+ case Stage::Fragment:
+ return 0x510u;
+ case Stage::Compute:
+ return 0x310u;
+ }
+ throw InvalidArgument("Invalid stage {}", program.stage);
+ }()};
+ auto& descs{program.info.storage_buffers_descriptors};
+ for (u32 index = 0; index < num_buffers; ++index) {
+ if (!program.info.nvn_buffer_used[index]) {
+ continue;
+ }
+ const u32 offset{base + index * descriptor_size};
+ const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
+ if (it != descs.end()) {
+ it->is_written |= program.info.stores_global_memory;
+ continue;
+ }
+ descs.push_back({
+ .cbuf_index = driver_cbuf,
+ .cbuf_offset = offset,
+ .count = 1,
+ .is_written = program.info.stores_global_memory,
+ });
+ }
+}
+} // Anonymous namespace
+
+IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
+ Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
+ IR::Program program;
+ program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+ program.blocks = GenerateBlocks(program.syntax_list);
+ program.post_order_blocks = PostOrder(program.syntax_list.front());
+ program.stage = env.ShaderStage();
+ program.local_memory_size = env.LocalMemorySize();
+ switch (program.stage) {
+ case Stage::TessellationControl: {
+ const ProgramHeader& sph{env.SPH()};
+ program.invocations = sph.common2.threads_per_input_primitive;
+ break;
+ }
+ case Stage::Geometry: {
+ const ProgramHeader& sph{env.SPH()};
+ program.output_topology = sph.common3.output_topology;
+ program.output_vertices = sph.common4.max_output_vertices;
+ program.invocations = sph.common2.threads_per_input_primitive;
+ program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
+ if (program.is_geometry_passthrough) {
+ const auto& mask{env.GpPassthroughMask()};
+ for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
+ program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
+ }
+ }
+ break;
+ }
+ case Stage::Compute:
+ program.workgroup_size = env.WorkgroupSize();
+ program.shared_memory_size = env.SharedMemorySize();
+ break;
+ default:
+ break;
+ }
+ RemoveUnreachableBlocks(program);
+
+ // Replace instructions before the SSA rewrite
+ if (!host_info.support_float16) {
+ Optimization::LowerFp16ToFp32(program);
+ }
+ if (!host_info.support_int64) {
+ Optimization::LowerInt64ToInt32(program);
+ }
+ Optimization::SsaRewritePass(program);
+
+ Optimization::GlobalMemoryToStorageBufferPass(program);
+ Optimization::TexturePass(env, program);
+
+ Optimization::ConstantPropagationPass(program);
+ Optimization::DeadCodeEliminationPass(program);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(program);
+ }
+ Optimization::CollectShaderInfoPass(env, program);
+ CollectInterpolationInfo(env, program);
+ AddNVNStorageBuffers(program);
+ return program;
+}
+
+IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b) {
+ IR::Program result{};
+ Optimization::VertexATransformPass(vertex_a);
+ Optimization::VertexBTransformPass(vertex_b);
+ for (const auto& term : vertex_a.syntax_list) {
+ if (term.type != IR::AbstractSyntaxNode::Type::Return) {
+ result.syntax_list.push_back(term);
+ }
+ }
+ result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
+ vertex_b.syntax_list.end());
+ result.blocks = GenerateBlocks(result.syntax_list);
+ result.post_order_blocks = vertex_b.post_order_blocks;
+ for (const auto& block : vertex_a.post_order_blocks) {
+ result.post_order_blocks.push_back(block);
+ }
+ result.stage = Stage::VertexB;
+ result.info = vertex_a.info;
+ result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
+ result.info.loads.mask |= vertex_b.info.loads.mask;
+ result.info.stores.mask |= vertex_b.info.stores.mask;
+
+ Optimization::JoinTextureInfo(result.info, vertex_b.info);
+ Optimization::JoinStorageInfo(result.info, vertex_b.info);
+ Optimization::DeadCodeEliminationPass(result);
+ if (Settings::values.renderer_debug) {
+ Optimization::VerificationPass(result);
+ }
+ Optimization::CollectShaderInfoPass(env_vertex_b, result);
+ return result;
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+
+namespace Shader::Maxwell {
+
+[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
+ ObjectPool<IR::Block>& block_pool, Environment& env,
+ Flow::CFG& cfg, const HostTranslateInfo& host_info);
+
+[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
+ Environment& env_vertex_b);
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
new file mode 100644
index 000000000..94a584219
--- /dev/null
+++ b/src/shader_recompiler/host_translate_info.h
@@ -0,0 +1,18 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+namespace Shader {
+
+// Try to keep entries here to a minimum
+// They can accidentally change the cached information in a shader
+
+/// Misc information about the host
+struct HostTranslateInfo {
+ bool support_float16{}; ///< True when the device supports 16-bit floats
+ bool support_int64{}; ///< True when the device supports 64-bit integers
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..5ead930f1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,928 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/alignment.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
+ if (count != 1) {
+ throw NotImplementedException("Constant buffer descriptor indexing");
+ }
+ if ((info.constant_buffer_mask & (1U << index)) != 0) {
+ return;
+ }
+ info.constant_buffer_mask |= 1U << index;
+
+ auto& cbufs{info.constant_buffer_descriptors};
+ cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
+ ConstantBufferDescriptor{
+ .index = index,
+ .count = 1,
+ });
+}
+
+void GetPatch(Info& info, IR::Patch patch) {
+ if (!IR::IsGeneric(patch)) {
+ throw NotImplementedException("Reading non-generic patch {}", patch);
+ }
+ info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+}
+
+void SetPatch(Info& info, IR::Patch patch) {
+ if (IR::IsGeneric(patch)) {
+ info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
+ return;
+ }
+ switch (patch) {
+ case IR::Patch::TessellationLodLeft:
+ case IR::Patch::TessellationLodTop:
+ case IR::Patch::TessellationLodRight:
+ case IR::Patch::TessellationLodBottom:
+ info.stores_tess_level_outer = true;
+ break;
+ case IR::Patch::TessellationLodInteriorU:
+ case IR::Patch::TessellationLodInteriorV:
+ info.stores_tess_level_inner = true;
+ break;
+ default:
+ throw NotImplementedException("Set patch {}", patch);
+ }
+}
+
+void CheckCBufNVN(Info& info, IR::Inst& inst) {
+ const IR::Value cbuf_index{inst.Arg(0)};
+ if (!cbuf_index.IsImmediate()) {
+ info.nvn_buffer_used.set();
+ return;
+ }
+ const u32 index{cbuf_index.U32()};
+ if (index != 0) {
+ return;
+ }
+ const IR::Value cbuf_offset{inst.Arg(1)};
+ if (!cbuf_offset.IsImmediate()) {
+ info.nvn_buffer_used.set();
+ return;
+ }
+ const u32 offset{cbuf_offset.U32()};
+ const u32 descriptor_size{0x10};
+ const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
+ if (offset >= info.nvn_buffer_base && offset < upper_limit) {
+ const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
+ info.nvn_buffer_used.set(nvn_index, true);
+ }
+}
+
+void VisitUsages(Info& info, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::CompositeConstructF16x2:
+ case IR::Opcode::CompositeConstructF16x3:
+ case IR::Opcode::CompositeConstructF16x4:
+ case IR::Opcode::CompositeExtractF16x2:
+ case IR::Opcode::CompositeExtractF16x3:
+ case IR::Opcode::CompositeExtractF16x4:
+ case IR::Opcode::CompositeInsertF16x2:
+ case IR::Opcode::CompositeInsertF16x3:
+ case IR::Opcode::CompositeInsertF16x4:
+ case IR::Opcode::SelectF16:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::PackFloat2x16:
+ case IR::Opcode::UnpackFloat2x16:
+ case IR::Opcode::ConvertS16F16:
+ case IR::Opcode::ConvertS32F16:
+ case IR::Opcode::ConvertS64F16:
+ case IR::Opcode::ConvertU16F16:
+ case IR::Opcode::ConvertU32F16:
+ case IR::Opcode::ConvertU64F16:
+ case IR::Opcode::ConvertF16S8:
+ case IR::Opcode::ConvertF16S16:
+ case IR::Opcode::ConvertF16S32:
+ case IR::Opcode::ConvertF16S64:
+ case IR::Opcode::ConvertF16U8:
+ case IR::Opcode::ConvertF16U16:
+ case IR::Opcode::ConvertF16U32:
+ case IR::Opcode::ConvertF16U64:
+ case IR::Opcode::FPAbs16:
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPNeg16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPSaturate16:
+ case IR::Opcode::FPClamp16:
+ case IR::Opcode::FPTrunc16:
+ case IR::Opcode::FPOrdEqual16:
+ case IR::Opcode::FPUnordEqual16:
+ case IR::Opcode::FPOrdNotEqual16:
+ case IR::Opcode::FPUnordNotEqual16:
+ case IR::Opcode::FPOrdLessThan16:
+ case IR::Opcode::FPUnordLessThan16:
+ case IR::Opcode::FPOrdGreaterThan16:
+ case IR::Opcode::FPUnordGreaterThan16:
+ case IR::Opcode::FPOrdLessThanEqual16:
+ case IR::Opcode::FPUnordLessThanEqual16:
+ case IR::Opcode::FPOrdGreaterThanEqual16:
+ case IR::Opcode::FPUnordGreaterThanEqual16:
+ case IR::Opcode::FPIsNan16:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::StorageAtomicAddF16x2:
+ case IR::Opcode::StorageAtomicMinF16x2:
+ case IR::Opcode::StorageAtomicMaxF16x2:
+ info.uses_fp16 = true;
+ break;
+ case IR::Opcode::CompositeConstructF64x2:
+ case IR::Opcode::CompositeConstructF64x3:
+ case IR::Opcode::CompositeConstructF64x4:
+ case IR::Opcode::CompositeExtractF64x2:
+ case IR::Opcode::CompositeExtractF64x3:
+ case IR::Opcode::CompositeExtractF64x4:
+ case IR::Opcode::CompositeInsertF64x2:
+ case IR::Opcode::CompositeInsertF64x3:
+ case IR::Opcode::CompositeInsertF64x4:
+ case IR::Opcode::SelectF64:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF64U64:
+ case IR::Opcode::PackDouble2x32:
+ case IR::Opcode::UnpackDouble2x32:
+ case IR::Opcode::FPAbs64:
+ case IR::Opcode::FPAdd64:
+ case IR::Opcode::FPCeil64:
+ case IR::Opcode::FPFloor64:
+ case IR::Opcode::FPFma64:
+ case IR::Opcode::FPMax64:
+ case IR::Opcode::FPMin64:
+ case IR::Opcode::FPMul64:
+ case IR::Opcode::FPNeg64:
+ case IR::Opcode::FPRecip64:
+ case IR::Opcode::FPRecipSqrt64:
+ case IR::Opcode::FPRoundEven64:
+ case IR::Opcode::FPSaturate64:
+ case IR::Opcode::FPClamp64:
+ case IR::Opcode::FPTrunc64:
+ case IR::Opcode::FPOrdEqual64:
+ case IR::Opcode::FPUnordEqual64:
+ case IR::Opcode::FPOrdNotEqual64:
+ case IR::Opcode::FPUnordNotEqual64:
+ case IR::Opcode::FPOrdLessThan64:
+ case IR::Opcode::FPUnordLessThan64:
+ case IR::Opcode::FPOrdGreaterThan64:
+ case IR::Opcode::FPUnordGreaterThan64:
+ case IR::Opcode::FPOrdLessThanEqual64:
+ case IR::Opcode::FPUnordLessThanEqual64:
+ case IR::Opcode::FPOrdGreaterThanEqual64:
+ case IR::Opcode::FPUnordGreaterThanEqual64:
+ case IR::Opcode::FPIsNan64:
+ case IR::Opcode::ConvertS16F64:
+ case IR::Opcode::ConvertS32F64:
+ case IR::Opcode::ConvertS64F64:
+ case IR::Opcode::ConvertU16F64:
+ case IR::Opcode::ConvertU32F64:
+ case IR::Opcode::ConvertU64F64:
+ case IR::Opcode::ConvertF32F64:
+ case IR::Opcode::ConvertF64F32:
+ case IR::Opcode::ConvertF64S8:
+ case IR::Opcode::ConvertF64S16:
+ case IR::Opcode::ConvertF64S32:
+ case IR::Opcode::ConvertF64S64:
+ case IR::Opcode::ConvertF64U8:
+ case IR::Opcode::ConvertF64U16:
+ case IR::Opcode::ConvertF64U32:
+ case IR::Opcode::ConvertF64U64:
+ info.uses_fp64 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ case IR::Opcode::UndefU8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::LoadStorageU8:
+ case IR::Opcode::LoadStorageS8:
+ case IR::Opcode::WriteStorageU8:
+ case IR::Opcode::WriteStorageS8:
+ case IR::Opcode::LoadSharedU8:
+ case IR::Opcode::LoadSharedS8:
+ case IR::Opcode::WriteSharedU8:
+ case IR::Opcode::SelectU8:
+ case IR::Opcode::ConvertF16S8:
+ case IR::Opcode::ConvertF16U8:
+ case IR::Opcode::ConvertF32S8:
+ case IR::Opcode::ConvertF32U8:
+ case IR::Opcode::ConvertF64S8:
+ case IR::Opcode::ConvertF64U8:
+ info.uses_int8 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ case IR::Opcode::UndefU16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::LoadStorageU16:
+ case IR::Opcode::LoadStorageS16:
+ case IR::Opcode::WriteStorageU16:
+ case IR::Opcode::WriteStorageS16:
+ case IR::Opcode::LoadSharedU16:
+ case IR::Opcode::LoadSharedS16:
+ case IR::Opcode::WriteSharedU16:
+ case IR::Opcode::SelectU16:
+ case IR::Opcode::BitCastU16F16:
+ case IR::Opcode::BitCastF16U16:
+ case IR::Opcode::ConvertS16F16:
+ case IR::Opcode::ConvertS16F32:
+ case IR::Opcode::ConvertS16F64:
+ case IR::Opcode::ConvertU16F16:
+ case IR::Opcode::ConvertU16F32:
+ case IR::Opcode::ConvertU16F64:
+ case IR::Opcode::ConvertF16S16:
+ case IR::Opcode::ConvertF16U16:
+ case IR::Opcode::ConvertF32S16:
+ case IR::Opcode::ConvertF32U16:
+ case IR::Opcode::ConvertF64S16:
+ case IR::Opcode::ConvertF64U16:
+ info.uses_int16 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::UndefU64:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::SelectU64:
+ case IR::Opcode::BitCastU64F64:
+ case IR::Opcode::BitCastF64U64:
+ case IR::Opcode::PackUint2x32:
+ case IR::Opcode::UnpackUint2x32:
+ case IR::Opcode::IAdd64:
+ case IR::Opcode::ISub64:
+ case IR::Opcode::INeg64:
+ case IR::Opcode::ShiftLeftLogical64:
+ case IR::Opcode::ShiftRightLogical64:
+ case IR::Opcode::ShiftRightArithmetic64:
+ case IR::Opcode::ConvertS64F16:
+ case IR::Opcode::ConvertS64F32:
+ case IR::Opcode::ConvertS64F64:
+ case IR::Opcode::ConvertU64F16:
+ case IR::Opcode::ConvertU64F32:
+ case IR::Opcode::ConvertU64F64:
+ case IR::Opcode::ConvertU64U32:
+ case IR::Opcode::ConvertU32U64:
+ case IR::Opcode::ConvertF16U64:
+ case IR::Opcode::ConvertF32U64:
+ case IR::Opcode::ConvertF64U64:
+ case IR::Opcode::SharedAtomicExchange64:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::StorageAtomicIAdd64:
+ case IR::Opcode::StorageAtomicSMin64:
+ case IR::Opcode::StorageAtomicUMin64:
+ case IR::Opcode::StorageAtomicSMax64:
+ case IR::Opcode::StorageAtomicUMax64:
+ case IR::Opcode::StorageAtomicAnd64:
+ case IR::Opcode::StorageAtomicOr64:
+ case IR::Opcode::StorageAtomicXor64:
+ case IR::Opcode::StorageAtomicExchange64:
+ info.uses_int64 = true;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ info.stores_global_memory = true;
+ [[fallthrough]];
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ info.uses_int64 = true;
+ info.uses_global_memory = true;
+ info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
+ info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
+ break;
+ default:
+ break;
+ }
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::DemoteToHelperInvocation:
+ info.uses_demote_to_helper_invocation = true;
+ break;
+ case IR::Opcode::GetAttribute:
+ info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
+ break;
+ case IR::Opcode::SetAttribute:
+ info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
+ break;
+ case IR::Opcode::GetPatch:
+ GetPatch(info, inst.Arg(0).Patch());
+ break;
+ case IR::Opcode::SetPatch:
+ SetPatch(info, inst.Arg(0).Patch());
+ break;
+ case IR::Opcode::GetAttributeIndexed:
+ info.loads_indexed_attributes = true;
+ break;
+ case IR::Opcode::SetAttributeIndexed:
+ info.stores_indexed_attributes = true;
+ break;
+ case IR::Opcode::SetFragColor:
+ info.stores_frag_color[inst.Arg(0).U32()] = true;
+ break;
+ case IR::Opcode::SetSampleMask:
+ info.stores_sample_mask = true;
+ break;
+ case IR::Opcode::SetFragDepth:
+ info.stores_frag_depth = true;
+ break;
+ case IR::Opcode::WorkgroupId:
+ info.uses_workgroup_id = true;
+ break;
+ case IR::Opcode::LocalInvocationId:
+ info.uses_local_invocation_id = true;
+ break;
+ case IR::Opcode::InvocationId:
+ info.uses_invocation_id = true;
+ break;
+ case IR::Opcode::SampleId:
+ info.uses_sample_id = true;
+ break;
+ case IR::Opcode::IsHelperInvocation:
+ info.uses_is_helper_invocation = true;
+ break;
+ case IR::Opcode::LaneId:
+ info.uses_subgroup_invocation_id = true;
+ break;
+ case IR::Opcode::ShuffleIndex:
+ case IR::Opcode::ShuffleUp:
+ case IR::Opcode::ShuffleDown:
+ case IR::Opcode::ShuffleButterfly:
+ info.uses_subgroup_shuffles = true;
+ break;
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ case IR::Opcode::GetCbufU32:
+ case IR::Opcode::GetCbufF32:
+ case IR::Opcode::GetCbufU32x2: {
+ const IR::Value index{inst.Arg(0)};
+ const IR::Value offset{inst.Arg(1)};
+ if (!index.IsImmediate()) {
+ throw NotImplementedException("Constant buffer with non-immediate index");
+ }
+ AddConstantBufferDescriptor(info, index.U32(), 1);
+ u32 element_size{};
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ info.used_constant_buffer_types |= IR::Type::U8;
+ element_size = 1;
+ break;
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ info.used_constant_buffer_types |= IR::Type::U16;
+ element_size = 2;
+ break;
+ case IR::Opcode::GetCbufU32:
+ info.used_constant_buffer_types |= IR::Type::U32;
+ element_size = 4;
+ break;
+ case IR::Opcode::GetCbufF32:
+ info.used_constant_buffer_types |= IR::Type::F32;
+ element_size = 4;
+ break;
+ case IR::Opcode::GetCbufU32x2:
+ info.used_constant_buffer_types |= IR::Type::U32x2;
+ element_size = 8;
+ break;
+ default:
+ break;
+ }
+ u32& size{info.constant_buffer_used_sizes[index.U32()]};
+ if (offset.IsImmediate()) {
+ size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
+ } else {
+ size = 0x10'000;
+ }
+ break;
+ }
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BindlessImageSampleExplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+ case IR::Opcode::BindlessImageGather:
+ case IR::Opcode::BindlessImageGatherDref:
+ case IR::Opcode::BindlessImageFetch:
+ case IR::Opcode::BindlessImageQueryDimensions:
+ case IR::Opcode::BindlessImageQueryLod:
+ case IR::Opcode::BindlessImageGradient:
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleExplicitLod:
+ case IR::Opcode::BoundImageSampleDrefImplicitLod:
+ case IR::Opcode::BoundImageSampleDrefExplicitLod:
+ case IR::Opcode::BoundImageGather:
+ case IR::Opcode::BoundImageGatherDref:
+ case IR::Opcode::BoundImageFetch:
+ case IR::Opcode::BoundImageQueryDimensions:
+ case IR::Opcode::BoundImageQueryLod:
+ case IR::Opcode::BoundImageGradient:
+ case IR::Opcode::ImageGather:
+ case IR::Opcode::ImageGatherDref:
+ case IR::Opcode::ImageFetch:
+ case IR::Opcode::ImageQueryDimensions:
+ case IR::Opcode::ImageGradient: {
+ const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
+ info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
+ info.uses_sparse_residency |=
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+ break;
+ }
+ case IR::Opcode::ImageSampleImplicitLod:
+ case IR::Opcode::ImageSampleExplicitLod:
+ case IR::Opcode::ImageSampleDrefImplicitLod:
+ case IR::Opcode::ImageSampleDrefExplicitLod:
+ case IR::Opcode::ImageQueryLod: {
+ const auto flags{inst.Flags<IR::TextureInstInfo>()};
+ const TextureType type{flags.type};
+ info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
+ info.uses_shadow_lod |= flags.is_depth != 0;
+ info.uses_sparse_residency |=
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+ break;
+ }
+ case IR::Opcode::ImageRead: {
+ const auto flags{inst.Flags<IR::TextureInstInfo>()};
+ info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless;
+ info.uses_sparse_residency |=
+ inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
+ break;
+ }
+ case IR::Opcode::ImageWrite: {
+ const auto flags{inst.Flags<IR::TextureInstInfo>()};
+ info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
+ info.uses_image_buffers |= flags.type == TextureType::Buffer;
+ break;
+ }
+ case IR::Opcode::SubgroupEqMask:
+ case IR::Opcode::SubgroupLtMask:
+ case IR::Opcode::SubgroupLeMask:
+ case IR::Opcode::SubgroupGtMask:
+ case IR::Opcode::SubgroupGeMask:
+ info.uses_subgroup_mask = true;
+ break;
+ case IR::Opcode::VoteAll:
+ case IR::Opcode::VoteAny:
+ case IR::Opcode::VoteEqual:
+ case IR::Opcode::SubgroupBallot:
+ info.uses_subgroup_vote = true;
+ break;
+ case IR::Opcode::FSwizzleAdd:
+ info.uses_fswzadd = true;
+ break;
+ case IR::Opcode::DPdxFine:
+ case IR::Opcode::DPdyFine:
+ case IR::Opcode::DPdxCoarse:
+ case IR::Opcode::DPdyCoarse:
+ info.uses_derivatives = true;
+ break;
+ case IR::Opcode::LoadStorageU8:
+ case IR::Opcode::LoadStorageS8:
+ case IR::Opcode::WriteStorageU8:
+ case IR::Opcode::WriteStorageS8:
+ info.used_storage_buffer_types |= IR::Type::U8;
+ break;
+ case IR::Opcode::LoadStorageU16:
+ case IR::Opcode::LoadStorageS16:
+ case IR::Opcode::WriteStorageU16:
+ case IR::Opcode::WriteStorageS16:
+ info.used_storage_buffer_types |= IR::Type::U16;
+ break;
+ case IR::Opcode::LoadStorage32:
+ case IR::Opcode::WriteStorage32:
+ case IR::Opcode::StorageAtomicIAdd32:
+ case IR::Opcode::StorageAtomicUMin32:
+ case IR::Opcode::StorageAtomicUMax32:
+ case IR::Opcode::StorageAtomicAnd32:
+ case IR::Opcode::StorageAtomicOr32:
+ case IR::Opcode::StorageAtomicXor32:
+ case IR::Opcode::StorageAtomicExchange32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ break;
+ case IR::Opcode::LoadStorage64:
+ case IR::Opcode::WriteStorage64:
+ info.used_storage_buffer_types |= IR::Type::U32x2;
+ break;
+ case IR::Opcode::LoadStorage128:
+ case IR::Opcode::WriteStorage128:
+ info.used_storage_buffer_types |= IR::Type::U32x4;
+ break;
+ case IR::Opcode::SharedAtomicSMin32:
+ info.uses_atomic_s32_min = true;
+ break;
+ case IR::Opcode::SharedAtomicSMax32:
+ info.uses_atomic_s32_max = true;
+ break;
+ case IR::Opcode::SharedAtomicInc32:
+ info.uses_shared_increment = true;
+ break;
+ case IR::Opcode::SharedAtomicDec32:
+ info.uses_shared_decrement = true;
+ break;
+ case IR::Opcode::SharedAtomicExchange64:
+ info.uses_int64_bit_atomics = true;
+ break;
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::StorageAtomicInc32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_global_increment = true;
+ break;
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::StorageAtomicDec32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_global_decrement = true;
+ break;
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::StorageAtomicAddF32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32_add = true;
+ break;
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::StorageAtomicAddF16x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f16x2_add = true;
+ break;
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::StorageAtomicAddF32x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32x2_add = true;
+ break;
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::StorageAtomicMinF16x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f16x2_min = true;
+ break;
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::StorageAtomicMinF32x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32x2_min = true;
+ break;
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::StorageAtomicMaxF16x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f16x2_max = true;
+ break;
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ case IR::Opcode::StorageAtomicMaxF32x2:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_f32x2_max = true;
+ break;
+ case IR::Opcode::StorageAtomicSMin32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_s32_min = true;
+ break;
+ case IR::Opcode::StorageAtomicSMax32:
+ info.used_storage_buffer_types |= IR::Type::U32;
+ info.uses_atomic_s32_max = true;
+ break;
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::StorageAtomicIAdd64:
+ case IR::Opcode::StorageAtomicSMin64:
+ case IR::Opcode::StorageAtomicUMin64:
+ case IR::Opcode::StorageAtomicSMax64:
+ case IR::Opcode::StorageAtomicUMax64:
+ case IR::Opcode::StorageAtomicAnd64:
+ case IR::Opcode::StorageAtomicOr64:
+ case IR::Opcode::StorageAtomicXor64:
+ info.used_storage_buffer_types |= IR::Type::U64;
+ info.uses_int64_bit_atomics = true;
+ break;
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BoundImageAtomicExchange32:
+ case IR::Opcode::ImageAtomicIAdd32:
+ case IR::Opcode::ImageAtomicSMin32:
+ case IR::Opcode::ImageAtomicUMin32:
+ case IR::Opcode::ImageAtomicSMax32:
+ case IR::Opcode::ImageAtomicUMax32:
+ case IR::Opcode::ImageAtomicInc32:
+ case IR::Opcode::ImageAtomicDec32:
+ case IR::Opcode::ImageAtomicAnd32:
+ case IR::Opcode::ImageAtomicOr32:
+ case IR::Opcode::ImageAtomicXor32:
+ case IR::Opcode::ImageAtomicExchange32:
+ info.uses_atomic_image_u32 = true;
+ break;
+ default:
+ break;
+ }
+}
+
+void VisitFpModifiers(Info& info, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::FPAdd16:
+ case IR::Opcode::FPFma16:
+ case IR::Opcode::FPMul16:
+ case IR::Opcode::FPRoundEven16:
+ case IR::Opcode::FPFloor16:
+ case IR::Opcode::FPCeil16:
+ case IR::Opcode::FPTrunc16: {
+ const auto control{inst.Flags<IR::FpControl>()};
+ switch (control.fmz_mode) {
+ case IR::FmzMode::DontCare:
+ break;
+ case IR::FmzMode::FTZ:
+ case IR::FmzMode::FMZ:
+ info.uses_fp16_denorms_flush = true;
+ break;
+ case IR::FmzMode::None:
+ info.uses_fp16_denorms_preserve = true;
+ break;
+ }
+ break;
+ }
+ case IR::Opcode::FPAdd32:
+ case IR::Opcode::FPFma32:
+ case IR::Opcode::FPMul32:
+ case IR::Opcode::FPRoundEven32:
+ case IR::Opcode::FPFloor32:
+ case IR::Opcode::FPCeil32:
+ case IR::Opcode::FPTrunc32:
+ case IR::Opcode::FPOrdEqual32:
+ case IR::Opcode::FPUnordEqual32:
+ case IR::Opcode::FPOrdNotEqual32:
+ case IR::Opcode::FPUnordNotEqual32:
+ case IR::Opcode::FPOrdLessThan32:
+ case IR::Opcode::FPUnordLessThan32:
+ case IR::Opcode::FPOrdGreaterThan32:
+ case IR::Opcode::FPUnordGreaterThan32:
+ case IR::Opcode::FPOrdLessThanEqual32:
+ case IR::Opcode::FPUnordLessThanEqual32:
+ case IR::Opcode::FPOrdGreaterThanEqual32:
+ case IR::Opcode::FPUnordGreaterThanEqual32:
+ case IR::Opcode::ConvertF16F32:
+ case IR::Opcode::ConvertF64F32: {
+ const auto control{inst.Flags<IR::FpControl>()};
+ switch (control.fmz_mode) {
+ case IR::FmzMode::DontCare:
+ break;
+ case IR::FmzMode::FTZ:
+ case IR::FmzMode::FMZ:
+ info.uses_fp32_denorms_flush = true;
+ break;
+ case IR::FmzMode::None:
+ info.uses_fp32_denorms_preserve = true;
+ break;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void VisitCbufs(Info& info, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ case IR::Opcode::GetCbufU32:
+ case IR::Opcode::GetCbufF32:
+ case IR::Opcode::GetCbufU32x2: {
+ CheckCBufNVN(info, inst);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void Visit(Info& info, IR::Inst& inst) {
+ VisitUsages(info, inst);
+ VisitFpModifiers(info, inst);
+ VisitCbufs(info, inst);
+}
+
+void GatherInfoFromHeader(Environment& env, Info& info) {
+ Stage stage{env.ShaderStage()};
+ if (stage == Stage::Compute) {
+ return;
+ }
+ const auto& header{env.SPH()};
+ if (stage == Stage::Fragment) {
+ if (!info.loads_indexed_attributes) {
+ return;
+ }
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
+ const auto vector{header.ps.imap_generic_vector[index]};
+ info.loads.mask[offset + 0] = vector.x != PixelImap::Unused;
+ info.loads.mask[offset + 1] = vector.y != PixelImap::Unused;
+ info.loads.mask[offset + 2] = vector.z != PixelImap::Unused;
+ info.loads.mask[offset + 3] = vector.w != PixelImap::Unused;
+ }
+ return;
+ }
+ if (info.loads_indexed_attributes) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
+ const auto mask = header.vtg.InputGeneric(index);
+ for (size_t i = 0; i < 4; ++i) {
+ info.loads.Set(attribute + i, mask[i]);
+ }
+ }
+ for (size_t index = 0; index < 8; ++index) {
+ const u16 mask{header.vtg.clip_distances};
+ info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+ }
+ info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0);
+ info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0);
+ info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0);
+ info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0);
+ info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0);
+ info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0);
+ info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0);
+ info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0);
+ info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0);
+ info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0);
+ info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0);
+ info.loads.Set(IR::Attribute::TessellationEvaluationPointU,
+ header.vtg.tessellation_eval_point_u != 0);
+ info.loads.Set(IR::Attribute::TessellationEvaluationPointV,
+ header.vtg.tessellation_eval_point_v != 0);
+ info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0);
+ info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0);
+ // TODO: Legacy varyings
+ }
+ if (info.stores_indexed_attributes) {
+ for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
+ const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
+ const auto mask{header.vtg.OutputGeneric(index)};
+ for (size_t i = 0; i < 4; ++i) {
+ info.stores.Set(attribute + i, mask[i]);
+ }
+ }
+ for (size_t index = 0; index < 8; ++index) {
+ const u16 mask{header.vtg.omap_systemc.clip_distances};
+ info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
+ }
+ info.stores.Set(IR::Attribute::PrimitiveId,
+ header.vtg.omap_systemb.primitive_array_id != 0);
+ info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0);
+ info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0);
+ info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0);
+ info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0);
+ info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0);
+ info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0);
+ info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0);
+ info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0);
+ info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0);
+ info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0);
+ info.stores.Set(IR::Attribute::TessellationEvaluationPointU,
+ header.vtg.omap_systemc.tessellation_eval_point_u != 0);
+ info.stores.Set(IR::Attribute::TessellationEvaluationPointV,
+ header.vtg.omap_systemc.tessellation_eval_point_v != 0);
+ info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0);
+ info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0);
+ // TODO: Legacy varyings
+ }
+}
+} // Anonymous namespace
+
+void CollectShaderInfoPass(Environment& env, IR::Program& program) {
+ Info& info{program.info};
+ const u32 base{[&] {
+ switch (program.stage) {
+ case Stage::VertexA:
+ case Stage::VertexB:
+ return 0x110u;
+ case Stage::TessellationControl:
+ return 0x210u;
+ case Stage::TessellationEval:
+ return 0x310u;
+ case Stage::Geometry:
+ return 0x410u;
+ case Stage::Fragment:
+ return 0x510u;
+ case Stage::Compute:
+ return 0x310u;
+ }
+ throw InvalidArgument("Invalid stage {}", program.stage);
+ }()};
+ info.nvn_buffer_base = base;
+
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ Visit(info, inst);
+ }
+ }
+ GatherInfoFromHeader(env, info);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+#include <type_traits>
+
+#include "common/bit_cast.h"
+#include "common/bit_util.h"
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+// Metaprogramming stuff to get arguments information out of a lambda
+template <typename Func>
+struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
+
+template <typename ReturnType, typename LambdaType, typename... Args>
+struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
+ template <size_t I>
+ using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
+
+ static constexpr size_t NUM_ARGS{sizeof...(Args)};
+};
+
+template <typename T>
+[[nodiscard]] T Arg(const IR::Value& value) {
+ if constexpr (std::is_same_v<T, bool>) {
+ return value.U1();
+ } else if constexpr (std::is_same_v<T, u32>) {
+ return value.U32();
+ } else if constexpr (std::is_same_v<T, s32>) {
+ return static_cast<s32>(value.U32());
+ } else if constexpr (std::is_same_v<T, f32>) {
+ return value.F32();
+ } else if constexpr (std::is_same_v<T, u64>) {
+ return value.U64();
+ }
+}
+
+template <typename T, typename ImmFn>
+bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
+ const IR::Value lhs{inst.Arg(0)};
+ const IR::Value rhs{inst.Arg(1)};
+
+ const bool is_lhs_immediate{lhs.IsImmediate()};
+ const bool is_rhs_immediate{rhs.IsImmediate()};
+
+ if (is_lhs_immediate && is_rhs_immediate) {
+ const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
+ inst.ReplaceUsesWith(IR::Value{result});
+ return false;
+ }
+ if (is_lhs_immediate && !is_rhs_immediate) {
+ IR::Inst* const rhs_inst{rhs.InstRecursive()};
+ if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
+ inst.SetArg(0, rhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ } else {
+ // Normalize
+ inst.SetArg(0, rhs);
+ inst.SetArg(1, lhs);
+ }
+ }
+ if (!is_lhs_immediate && is_rhs_immediate) {
+ const IR::Inst* const lhs_inst{lhs.InstRecursive()};
+ if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
+ const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
+ inst.SetArg(0, lhs_inst->Arg(0));
+ inst.SetArg(1, IR::Value{combined});
+ }
+ }
+ return true;
+}
+
+template <typename Func>
+bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
+ if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
+ return false;
+ }
+ using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
+ inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
+ return true;
+}
+
+void FoldGetRegister(IR::Inst& inst) {
+ if (inst.Arg(0).Reg() == IR::Reg::RZ) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ }
+}
+
+void FoldGetPred(IR::Inst& inst) {
+ if (inst.Arg(0).Pred() == IR::Pred::PT) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ }
+}
+
+/// Replaces the pattern generated by two XMAD multiplications
+bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
+ /*
+ * We are looking for this pattern:
+ * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
+ * %rhs_mul = IMul32 %rhs_bfe, %factor_b
+ * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
+ * %rhs_mul = IMul32 %lhs_bfe, %factor_b
+ * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
+ * %result = IAdd32 %lhs_shl, %rhs_mul
+ *
+ * And replacing it with
+ * %result = IMul32 %factor_a, %factor_b
+ *
+ * This optimization has been proven safe by LLVM and MSVC.
+ */
+ const IR::Value lhs_arg{inst.Arg(0)};
+ const IR::Value rhs_arg{inst.Arg(1)};
+ if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
+ if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
+ lhs_shl->Arg(1) != IR::Value{16U}) {
+ return false;
+ }
+ if (lhs_shl->Arg(0).IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
+ IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
+ if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
+ return false;
+ }
+ if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
+ return false;
+ }
+ const IR::U32 factor_b{lhs_mul->Arg(1)};
+ if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
+ return false;
+ }
+ IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
+ IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
+ if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return false;
+ }
+ if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
+ return false;
+ }
+ if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
+ return false;
+ }
+ if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
+ return false;
+ }
+ if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
+ return false;
+ }
+ const IR::U32 factor_a{lhs_bfe->Arg(0)};
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
+ return true;
+}
+
+template <typename T>
+void FoldAdd(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ return;
+ }
+ if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ return;
+ }
+ if constexpr (std::is_same_v<T, u32>) {
+ if (FoldXmadMultiply(block, inst)) {
+ return;
+ }
+ }
+}
+
+void FoldISub32(IR::Inst& inst) {
+ if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
+ return;
+ }
+ if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
+ return;
+ }
+ // ISub32 is generally used to subtract two constant buffers, compare and replace this with
+ // zero if they equal.
+ const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
+ return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
+ b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
+ a->Arg(1) == b->Arg(1);
+ }};
+ IR::Inst* op_a{inst.Arg(0).InstRecursive()};
+ IR::Inst* op_b{inst.Arg(1).InstRecursive()};
+ if (equal_cbuf(op_a, op_b)) {
+ inst.ReplaceUsesWith(IR::Value{u32{0}});
+ return;
+ }
+ // It's also possible a value is being added to a cbuf and then subtracted
+ if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
+ // Canonicalize local variables to simplify the following logic
+ std::swap(op_a, op_b);
+ }
+ if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
+ return;
+ }
+ IR::Inst* const inst_cbuf{op_b};
+ if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
+ return;
+ }
+ IR::Value add_op_a{op_a->Arg(0)};
+ IR::Value add_op_b{op_a->Arg(1)};
+ if (add_op_b.IsImmediate()) {
+ // Canonicalize
+ std::swap(add_op_a, add_op_b);
+ }
+ if (add_op_b.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
+ if (equal_cbuf(add_cbuf, inst_cbuf)) {
+ inst.ReplaceUsesWith(add_op_a);
+ }
+}
+
+void FoldSelect(IR::Inst& inst) {
+ const IR::Value cond{inst.Arg(0)};
+ if (cond.IsImmediate()) {
+ inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
+ }
+}
+
+void FoldFPMul32(IR::Inst& inst) {
+ const auto control{inst.Flags<IR::FpControl>()};
+ if (control.no_contraction) {
+ return;
+ }
+ // Fold interpolation operations
+ const IR::Value lhs_value{inst.Arg(0)};
+ const IR::Value rhs_value{inst.Arg(1)};
+ if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const lhs_op{lhs_value.InstRecursive()};
+ IR::Inst* const rhs_op{rhs_value.InstRecursive()};
+ if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
+ rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
+ return;
+ }
+ const IR::Value recip_source{rhs_op->Arg(0)};
+ const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
+ if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const attr_a{recip_source.InstRecursive()};
+ IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
+ if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
+ attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
+ return;
+ }
+ if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
+ inst.ReplaceUsesWith(lhs_op->Arg(0));
+ }
+}
+
+void FoldLogicalAnd(IR::Inst& inst) {
+ if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ } else {
+ inst.ReplaceUsesWith(IR::Value{false});
+ }
+ }
+}
+
+void FoldLogicalOr(IR::Inst& inst) {
+ if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
+ return;
+ }
+ const IR::Value rhs{inst.Arg(1)};
+ if (rhs.IsImmediate()) {
+ if (rhs.U1()) {
+ inst.ReplaceUsesWith(IR::Value{true});
+ } else {
+ inst.ReplaceUsesWith(inst.Arg(0));
+ }
+ }
+}
+
+void FoldLogicalNot(IR::Inst& inst) {
+ const IR::U1 value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ inst.ReplaceUsesWith(IR::Value{!value.U1()});
+ return;
+ }
+ IR::Inst* const arg{value.InstRecursive()};
+ if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
+ inst.ReplaceUsesWith(arg->Arg(0));
+ }
+}
+
+template <IR::Opcode op, typename Dest, typename Source>
+void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
+ const IR::Value value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
+ return;
+ }
+ IR::Inst* const arg_inst{value.InstRecursive()};
+ if (arg_inst->GetOpcode() == reverse) {
+ inst.ReplaceUsesWith(arg_inst->Arg(0));
+ return;
+ }
+ if constexpr (op == IR::Opcode::BitCastF32U32) {
+ if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
+ // Replace the bitcast with a typed constant buffer read
+ inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
+ inst.SetArg(0, arg_inst->Arg(0));
+ inst.SetArg(1, arg_inst->Arg(1));
+ return;
+ }
+ }
+}
+
+void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
+ const IR::Value value{inst.Arg(0)};
+ if (value.IsImmediate()) {
+ return;
+ }
+ IR::Inst* const arg_inst{value.InstRecursive()};
+ if (arg_inst->GetOpcode() == reverse) {
+ inst.ReplaceUsesWith(arg_inst->Arg(0));
+ return;
+ }
+}
+
+template <typename Func, size_t... I>
+IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
+ using Traits = LambdaTraits<decltype(func)>;
+ return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
+}
+
+std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
+ IR::Opcode construct, u32 first_index) {
+ IR::Inst* const inst{inst_value.InstRecursive()};
+ if (inst->GetOpcode() == construct) {
+ return inst->Arg(first_index);
+ }
+ if (inst->GetOpcode() != insert) {
+ return std::nullopt;
+ }
+ IR::Value value_index{inst->Arg(2)};
+ if (!value_index.IsImmediate()) {
+ return std::nullopt;
+ }
+ const u32 second_index{value_index.U32()};
+ if (first_index != second_index) {
+ IR::Value value_composite{inst->Arg(0)};
+ if (value_composite.IsImmediate()) {
+ return std::nullopt;
+ }
+ return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
+ }
+ return inst->Arg(1);
+}
+
+void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
+ const IR::Value value_1{inst.Arg(0)};
+ const IR::Value value_2{inst.Arg(1)};
+ if (value_1.IsImmediate()) {
+ return;
+ }
+ if (!value_2.IsImmediate()) {
+ return;
+ }
+ const u32 first_index{value_2.U32()};
+ const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
+ if (!result) {
+ return;
+ }
+ inst.ReplaceUsesWith(*result);
+}
+
+IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
+ if (value.IsImmediate()) {
+ return value;
+ }
+ IR::Inst* const inst{value.InstRecursive()};
+ if (inst->GetOpcode() == expected_cast) {
+ return inst->Arg(0).Resolve();
+ }
+ return value;
+}
+
+void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
+ const IR::Value swizzle{inst.Arg(2)};
+ if (!swizzle.IsImmediate()) {
+ return;
+ }
+ const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
+ const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
+ if (value_1.IsImmediate()) {
+ return;
+ }
+ const u32 swizzle_value{swizzle.U32()};
+ if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
+ return;
+ }
+ IR::Inst* const inst2{value_1.InstRecursive()};
+ if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
+ return;
+ }
+ const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
+ if (value_2 != value_3) {
+ return;
+ }
+ const IR::Value index{inst2->Arg(1)};
+ const IR::Value clamp{inst2->Arg(2)};
+ const IR::Value segmentation_mask{inst2->Arg(3)};
+ if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
+ return;
+ }
+ if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
+ return;
+ }
+ if (swizzle_value == 0x99) {
+ // DPdxFine
+ if (index.U32() == 1) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
+ }
+ } else if (swizzle_value == 0xA5) {
+ // DPdyFine
+ if (index.U32() == 2) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
+ }
+ }
+}
+
+void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::GetRegister:
+ return FoldGetRegister(inst);
+ case IR::Opcode::GetPred:
+ return FoldGetPred(inst);
+ case IR::Opcode::IAdd32:
+ return FoldAdd<u32>(block, inst);
+ case IR::Opcode::ISub32:
+ return FoldISub32(inst);
+ case IR::Opcode::IMul32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
+ return;
+ case IR::Opcode::ShiftRightArithmetic32:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
+ return;
+ case IR::Opcode::BitCastF32U32:
+ return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
+ case IR::Opcode::BitCastU32F32:
+ return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
+ case IR::Opcode::IAdd64:
+ return FoldAdd<u64>(block, inst);
+ case IR::Opcode::PackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
+ case IR::Opcode::UnpackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
+ case IR::Opcode::SelectU1:
+ case IR::Opcode::SelectU8:
+ case IR::Opcode::SelectU16:
+ case IR::Opcode::SelectU32:
+ case IR::Opcode::SelectU64:
+ case IR::Opcode::SelectF16:
+ case IR::Opcode::SelectF32:
+ case IR::Opcode::SelectF64:
+ return FoldSelect(inst);
+ case IR::Opcode::FPMul32:
+ return FoldFPMul32(inst);
+ case IR::Opcode::LogicalAnd:
+ return FoldLogicalAnd(inst);
+ case IR::Opcode::LogicalOr:
+ return FoldLogicalOr(inst);
+ case IR::Opcode::LogicalNot:
+ return FoldLogicalNot(inst);
+ case IR::Opcode::SLessThan:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
+ return;
+ case IR::Opcode::ULessThan:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
+ return;
+ case IR::Opcode::SLessThanEqual:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
+ return;
+ case IR::Opcode::ULessThanEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
+ return;
+ case IR::Opcode::SGreaterThan:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
+ return;
+ case IR::Opcode::UGreaterThan:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
+ return;
+ case IR::Opcode::SGreaterThanEqual:
+ FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
+ return;
+ case IR::Opcode::UGreaterThanEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
+ return;
+ case IR::Opcode::IEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
+ return;
+ case IR::Opcode::INotEqual:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
+ return;
+ case IR::Opcode::BitwiseAnd32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
+ return;
+ case IR::Opcode::BitwiseOr32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
+ return;
+ case IR::Opcode::BitwiseXor32:
+ FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
+ return;
+ case IR::Opcode::BitFieldUExtract:
+ FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
+ if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
+ throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
+ base, shift, count);
+ }
+ return (base >> shift) & ((1U << count) - 1);
+ });
+ return;
+ case IR::Opcode::BitFieldSExtract:
+ FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
+ const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
+ const size_t left_shift{32 - back_shift};
+ const size_t right_shift{static_cast<size_t>(32 - count)};
+ if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
+ throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
+ base, shift, count);
+ }
+ return static_cast<u32>((base << left_shift) >> right_shift);
+ });
+ return;
+ case IR::Opcode::BitFieldInsert:
+ FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
+ if (bits >= 32 || offset >= 32) {
+ throw LogicError("Undefined result in {}({}, {}, {}, {})",
+ IR::Opcode::BitFieldInsert, base, insert, offset, bits);
+ }
+ return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
+ });
+ return;
+ case IR::Opcode::CompositeExtractU32x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
+ IR::Opcode::CompositeInsertU32x2);
+ case IR::Opcode::CompositeExtractU32x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
+ IR::Opcode::CompositeInsertU32x3);
+ case IR::Opcode::CompositeExtractU32x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
+ IR::Opcode::CompositeInsertU32x4);
+ case IR::Opcode::CompositeExtractF32x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
+ IR::Opcode::CompositeInsertF32x2);
+ case IR::Opcode::CompositeExtractF32x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
+ IR::Opcode::CompositeInsertF32x3);
+ case IR::Opcode::CompositeExtractF32x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
+ IR::Opcode::CompositeInsertF32x4);
+ case IR::Opcode::CompositeExtractF16x2:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
+ IR::Opcode::CompositeInsertF16x2);
+ case IR::Opcode::CompositeExtractF16x3:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
+ IR::Opcode::CompositeInsertF16x3);
+ case IR::Opcode::CompositeExtractF16x4:
+ return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
+ IR::Opcode::CompositeInsertF16x4);
+ case IR::Opcode::FSwizzleAdd:
+ return FoldFSwizzleAdd(block, inst);
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void ConstantPropagationPass(IR::Program& program) {
+ const auto end{program.post_order_blocks.rend()};
+ for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+ IR::Block* const block{*it};
+ for (IR::Inst& inst : block->Instructions()) {
+ ConstantPropagation(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
new file mode 100644
index 000000000..400836301
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void DeadCodeEliminationPass(IR::Program& program) {
+ // We iterate over the instructions in reverse order.
+ // This is because removing an instruction reduces the number of uses for earlier instructions.
+ for (IR::Block* const block : program.post_order_blocks) {
+ auto it{block->end()};
+ while (it != block->begin()) {
+ --it;
+ if (!it->HasUses() && !it->MayHaveSideEffects()) {
+ it->Invalidate();
+ it = block->Instructions().erase(it);
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 000000000..055ba9c54
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void VertexATransformPass(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+ return inst.Invalidate();
+ }
+ }
+ }
+}
+
+void VertexBTransformPass(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::Prologue) {
+ return inst.Invalidate();
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..4197b0095
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,526 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <compare>
+#include <optional>
+#include <queue>
+
+#include <boost/container/flat_set.hpp>
+#include <boost/container/small_vector.hpp>
+
+#include "common/alignment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+/// Address in constant buffers to the storage buffer descriptor
+struct StorageBufferAddr {
+ auto operator<=>(const StorageBufferAddr&) const noexcept = default;
+
+ u32 index;
+ u32 offset;
+};
+
+/// Block iterator to a global memory instruction and the storage buffer it uses
+struct StorageInst {
+ StorageBufferAddr storage_buffer;
+ IR::Inst* inst;
+ IR::Block* block;
+};
+
+/// Bias towards a certain range of constant buffers when looking for storage buffers
+struct Bias {
+ u32 index;
+ u32 offset_begin;
+ u32 offset_end;
+};
+
+using boost::container::flat_set;
+using boost::container::small_vector;
+using StorageBufferSet =
+ flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
+using StorageInstVector = small_vector<StorageInst, 24>;
+using StorageWritesSet =
+ flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
+
+struct StorageInfo {
+ StorageBufferSet set;
+ StorageInstVector to_replace;
+ StorageWritesSet writes;
+};
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemory(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Returns true when the instruction is a global memory instruction
+bool IsGlobalMemoryWrite(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Converts a global memory opcode to its storage buffer equivalent
+IR::Opcode GlobalToStorage(IR::Opcode opcode) {
+ switch (opcode) {
+ case IR::Opcode::LoadGlobalS8:
+ return IR::Opcode::LoadStorageS8;
+ case IR::Opcode::LoadGlobalU8:
+ return IR::Opcode::LoadStorageU8;
+ case IR::Opcode::LoadGlobalS16:
+ return IR::Opcode::LoadStorageS16;
+ case IR::Opcode::LoadGlobalU16:
+ return IR::Opcode::LoadStorageU16;
+ case IR::Opcode::LoadGlobal32:
+ return IR::Opcode::LoadStorage32;
+ case IR::Opcode::LoadGlobal64:
+ return IR::Opcode::LoadStorage64;
+ case IR::Opcode::LoadGlobal128:
+ return IR::Opcode::LoadStorage128;
+ case IR::Opcode::WriteGlobalS8:
+ return IR::Opcode::WriteStorageS8;
+ case IR::Opcode::WriteGlobalU8:
+ return IR::Opcode::WriteStorageU8;
+ case IR::Opcode::WriteGlobalS16:
+ return IR::Opcode::WriteStorageS16;
+ case IR::Opcode::WriteGlobalU16:
+ return IR::Opcode::WriteStorageU16;
+ case IR::Opcode::WriteGlobal32:
+ return IR::Opcode::WriteStorage32;
+ case IR::Opcode::WriteGlobal64:
+ return IR::Opcode::WriteStorage64;
+ case IR::Opcode::WriteGlobal128:
+ return IR::Opcode::WriteStorage128;
+ case IR::Opcode::GlobalAtomicIAdd32:
+ return IR::Opcode::StorageAtomicIAdd32;
+ case IR::Opcode::GlobalAtomicSMin32:
+ return IR::Opcode::StorageAtomicSMin32;
+ case IR::Opcode::GlobalAtomicUMin32:
+ return IR::Opcode::StorageAtomicUMin32;
+ case IR::Opcode::GlobalAtomicSMax32:
+ return IR::Opcode::StorageAtomicSMax32;
+ case IR::Opcode::GlobalAtomicUMax32:
+ return IR::Opcode::StorageAtomicUMax32;
+ case IR::Opcode::GlobalAtomicInc32:
+ return IR::Opcode::StorageAtomicInc32;
+ case IR::Opcode::GlobalAtomicDec32:
+ return IR::Opcode::StorageAtomicDec32;
+ case IR::Opcode::GlobalAtomicAnd32:
+ return IR::Opcode::StorageAtomicAnd32;
+ case IR::Opcode::GlobalAtomicOr32:
+ return IR::Opcode::StorageAtomicOr32;
+ case IR::Opcode::GlobalAtomicXor32:
+ return IR::Opcode::StorageAtomicXor32;
+ case IR::Opcode::GlobalAtomicIAdd64:
+ return IR::Opcode::StorageAtomicIAdd64;
+ case IR::Opcode::GlobalAtomicSMin64:
+ return IR::Opcode::StorageAtomicSMin64;
+ case IR::Opcode::GlobalAtomicUMin64:
+ return IR::Opcode::StorageAtomicUMin64;
+ case IR::Opcode::GlobalAtomicSMax64:
+ return IR::Opcode::StorageAtomicSMax64;
+ case IR::Opcode::GlobalAtomicUMax64:
+ return IR::Opcode::StorageAtomicUMax64;
+ case IR::Opcode::GlobalAtomicAnd64:
+ return IR::Opcode::StorageAtomicAnd64;
+ case IR::Opcode::GlobalAtomicOr64:
+ return IR::Opcode::StorageAtomicOr64;
+ case IR::Opcode::GlobalAtomicXor64:
+ return IR::Opcode::StorageAtomicXor64;
+ case IR::Opcode::GlobalAtomicExchange32:
+ return IR::Opcode::StorageAtomicExchange32;
+ case IR::Opcode::GlobalAtomicExchange64:
+ return IR::Opcode::StorageAtomicExchange64;
+ case IR::Opcode::GlobalAtomicAddF32:
+ return IR::Opcode::StorageAtomicAddF32;
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ return IR::Opcode::StorageAtomicAddF16x2;
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ return IR::Opcode::StorageAtomicMinF16x2;
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ return IR::Opcode::StorageAtomicMaxF16x2;
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ return IR::Opcode::StorageAtomicAddF32x2;
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ return IR::Opcode::StorageAtomicMinF32x2;
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return IR::Opcode::StorageAtomicMaxF32x2;
+ default:
+ throw InvalidArgument("Invalid global memory opcode {}", opcode);
+ }
+}
+
+/// Returns true when a storage buffer address satisfies a bias
+bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
+ return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
+ storage_buffer.offset < bias.offset_end;
+}
+
+struct LowAddrInfo {
+ IR::U32 value;
+ s32 imm_offset;
+};
+
+/// Tries to track the first 32-bits of a global memory instruction
+std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
+ // The first argument is the low level GPU pointer to the global memory instruction
+ const IR::Value addr{inst->Arg(0)};
+ if (addr.IsImmediate()) {
+ // Not much we can do if it's an immediate
+ return std::nullopt;
+ }
+ // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
+ IR::Inst* addr_inst{addr.InstRecursive()};
+ s32 imm_offset{0};
+ if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
+ // If it's an IAdd64, get the immediate offset it is applying and grab the address
+ // instruction. This expects for the instruction to be canonicalized having the address on
+ // the first argument and the immediate offset on the second one.
+ const IR::U64 imm_offset_value{addr_inst->Arg(1)};
+ if (!imm_offset_value.IsImmediate()) {
+ return std::nullopt;
+ }
+ imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
+ const IR::U64 iadd_addr{addr_inst->Arg(0)};
+ if (iadd_addr.IsImmediate()) {
+ return std::nullopt;
+ }
+ addr_inst = iadd_addr.InstRecursive();
+ }
+ // With IAdd64 handled, now PackUint2x32 is expected
+ if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
+ // PackUint2x32 is expected to be generated from a vector
+ const IR::Value vector{addr_inst->Arg(0)};
+ if (vector.IsImmediate()) {
+ return std::nullopt;
+ }
+ addr_inst = vector.InstRecursive();
+ }
+ // The vector is expected to be a CompositeConstructU32x2
+ if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
+ return std::nullopt;
+ }
+ // Grab the first argument from the CompositeConstructU32x2, this is the low address.
+ return LowAddrInfo{
+ .value{IR::U32{addr_inst->Arg(0)}},
+ .imm_offset = imm_offset,
+ };
+}
+
+/// Tries to track the storage buffer address used by a global memory instruction
+std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
+ const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
+ if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+ return std::nullopt;
+ }
+ const IR::Value index{inst->Arg(0)};
+ const IR::Value offset{inst->Arg(1)};
+ if (!index.IsImmediate()) {
+ // Definitely not a storage buffer if it's read from a
+ // non-immediate index
+ return std::nullopt;
+ }
+ if (!offset.IsImmediate()) {
+ // TODO: Support SSBO arrays
+ return std::nullopt;
+ }
+ const StorageBufferAddr storage_buffer{
+ .index = index.U32(),
+ .offset = offset.U32(),
+ };
+ if (!Common::IsAligned(storage_buffer.offset, 16)) {
+ // The SSBO pointer has to be aligned
+ return std::nullopt;
+ }
+ if (bias && !MeetsBias(storage_buffer, *bias)) {
+ // We have to blacklist some addresses in case we wrongly
+ // point to them
+ return std::nullopt;
+ }
+ return storage_buffer;
+ }};
+ return BreadthFirstSearch(value, pred);
+}
+
+/// Collects the storage buffer used by a global memory instruction and the instruction itself
+void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
+ // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
+ // avoid getting false positives
+ static constexpr Bias nvn_bias{
+ .index = 0,
+ .offset_begin = 0x110,
+ .offset_end = 0x610,
+ };
+ // Track the low address of the instruction
+ const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
+ if (!low_addr_info) {
+ // Failed to track the low address, use NVN fallbacks
+ return;
+ }
+ // First try to find storage buffers in the NVN address
+ const IR::U32 low_addr{low_addr_info->value};
+ std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
+ if (!storage_buffer) {
+ // If it fails, track without a bias
+ storage_buffer = Track(low_addr, nullptr);
+ if (!storage_buffer) {
+ // If that also fails, use NVN fallbacks
+ return;
+ }
+ }
+ // Collect storage buffer and the instruction
+ if (IsGlobalMemoryWrite(inst)) {
+ info.writes.insert(*storage_buffer);
+ }
+ info.set.insert(*storage_buffer);
+ info.to_replace.push_back(StorageInst{
+ .storage_buffer{*storage_buffer},
+ .inst = &inst,
+ .block = &block,
+ });
+}
+
+/// Returns the offset in indices (not bytes) for an equivalent storage instruction
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ IR::U32 offset;
+ if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
+ offset = low_addr->value;
+ if (low_addr->imm_offset != 0) {
+ offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
+ }
+ } else {
+ offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
+ }
+ // Subtract the least significant 32 bits from the guest offset. The result is the storage
+ // buffer offset in bytes.
+ const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ return ir.ISub(offset, low_cbuf);
+}
+
+/// Replace a global memory load instruction with its storage buffer equivalent
+void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
+ inst.ReplaceUsesWith(value);
+}
+
+/// Replace a global memory write instruction with its storage buffer equivalent
+void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
+ inst.Invalidate();
+}
+
+/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
+void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
+ const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
+ const IR::Value value{
+ &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
+ inst.ReplaceUsesWith(value);
+}
+
+/// Replace a global memory instruction with its storage buffer equivalent
+void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
+ const IR::U32& offset) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::LoadGlobalS8:
+ case IR::Opcode::LoadGlobalU8:
+ case IR::Opcode::LoadGlobalS16:
+ case IR::Opcode::LoadGlobalU16:
+ case IR::Opcode::LoadGlobal32:
+ case IR::Opcode::LoadGlobal64:
+ case IR::Opcode::LoadGlobal128:
+ return ReplaceLoad(block, inst, storage_index, offset);
+ case IR::Opcode::WriteGlobalS8:
+ case IR::Opcode::WriteGlobalU8:
+ case IR::Opcode::WriteGlobalS16:
+ case IR::Opcode::WriteGlobalU16:
+ case IR::Opcode::WriteGlobal32:
+ case IR::Opcode::WriteGlobal64:
+ case IR::Opcode::WriteGlobal128:
+ return ReplaceWrite(block, inst, storage_index, offset);
+ case IR::Opcode::GlobalAtomicIAdd32:
+ case IR::Opcode::GlobalAtomicSMin32:
+ case IR::Opcode::GlobalAtomicUMin32:
+ case IR::Opcode::GlobalAtomicSMax32:
+ case IR::Opcode::GlobalAtomicUMax32:
+ case IR::Opcode::GlobalAtomicInc32:
+ case IR::Opcode::GlobalAtomicDec32:
+ case IR::Opcode::GlobalAtomicAnd32:
+ case IR::Opcode::GlobalAtomicOr32:
+ case IR::Opcode::GlobalAtomicXor32:
+ case IR::Opcode::GlobalAtomicExchange32:
+ case IR::Opcode::GlobalAtomicIAdd64:
+ case IR::Opcode::GlobalAtomicSMin64:
+ case IR::Opcode::GlobalAtomicUMin64:
+ case IR::Opcode::GlobalAtomicSMax64:
+ case IR::Opcode::GlobalAtomicUMax64:
+ case IR::Opcode::GlobalAtomicAnd64:
+ case IR::Opcode::GlobalAtomicOr64:
+ case IR::Opcode::GlobalAtomicXor64:
+ case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicAddF32:
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ case IR::Opcode::GlobalAtomicAddF32x2:
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ case IR::Opcode::GlobalAtomicMinF32x2:
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ case IR::Opcode::GlobalAtomicMaxF32x2:
+ return ReplaceAtomic(block, inst, storage_index, offset);
+ default:
+ throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
+ }
+}
+} // Anonymous namespace
+
+void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+ StorageInfo info;
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (!IsGlobalMemory(inst)) {
+ continue;
+ }
+ CollectStorageBuffers(*block, inst, info);
+ }
+ }
+ for (const StorageBufferAddr& storage_buffer : info.set) {
+ program.info.storage_buffers_descriptors.push_back({
+ .cbuf_index = storage_buffer.index,
+ .cbuf_offset = storage_buffer.offset,
+ .count = 1,
+ .is_written = info.writes.contains(storage_buffer),
+ });
+ }
+ for (const StorageInst& storage_inst : info.to_replace) {
+ const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
+ const auto it{info.set.find(storage_inst.storage_buffer)};
+ const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
+ IR::Block* const block{storage_inst.block};
+ IR::Inst* const inst{storage_inst.inst};
+ const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ Replace(*block, *inst, index, offset);
+ }
+}
+
+template <typename Descriptors, typename Descriptor, typename Func>
+static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+ // TODO: Handle arrays
+ const auto it{std::ranges::find_if(descriptors, pred)};
+ if (it != descriptors.end()) {
+ return static_cast<u32>(std::distance(descriptors.begin(), it));
+ }
+ descriptors.push_back(desc);
+ return static_cast<u32>(descriptors.size()) - 1;
+}
+
+void JoinStorageInfo(Info& base, Info& source) {
+ auto& descriptors = base.storage_buffers_descriptors;
+ for (auto& desc : source.storage_buffers_descriptors) {
+ auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
+ return desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
+ })};
+ if (it != descriptors.end()) {
+ it->is_written |= desc.is_written;
+ continue;
+ }
+ descriptors.push_back(desc);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
new file mode 100644
index 000000000..e9b55f835
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void IdentityRemovalPass(IR::Program& program) {
+ std::vector<IR::Inst*> to_invalidate;
+ for (IR::Block* const block : program.blocks) {
+ for (auto inst = block->begin(); inst != block->end();) {
+ const size_t num_args{inst->NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ IR::Value arg;
+ while ((arg = inst->Arg(i)).IsIdentity()) {
+ inst->SetArg(i, arg.Inst()->Arg(0));
+ }
+ }
+ if (inst->GetOpcode() == IR::Opcode::Identity ||
+ inst->GetOpcode() == IR::Opcode::Void) {
+ to_invalidate.push_back(&*inst);
+ inst = block->Instructions().erase(inst);
+ } else {
+ ++inst;
+ }
+ }
+ }
+ for (IR::Inst* const inst : to_invalidate) {
+ inst->Invalidate();
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..773e1f961
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,143 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+IR::Opcode Replace(IR::Opcode op) {
+ switch (op) {
+ case IR::Opcode::FPAbs16:
+ return IR::Opcode::FPAbs32;
+ case IR::Opcode::FPAdd16:
+ return IR::Opcode::FPAdd32;
+ case IR::Opcode::FPCeil16:
+ return IR::Opcode::FPCeil32;
+ case IR::Opcode::FPFloor16:
+ return IR::Opcode::FPFloor32;
+ case IR::Opcode::FPFma16:
+ return IR::Opcode::FPFma32;
+ case IR::Opcode::FPMul16:
+ return IR::Opcode::FPMul32;
+ case IR::Opcode::FPNeg16:
+ return IR::Opcode::FPNeg32;
+ case IR::Opcode::FPRoundEven16:
+ return IR::Opcode::FPRoundEven32;
+ case IR::Opcode::FPSaturate16:
+ return IR::Opcode::FPSaturate32;
+ case IR::Opcode::FPClamp16:
+ return IR::Opcode::FPClamp32;
+ case IR::Opcode::FPTrunc16:
+ return IR::Opcode::FPTrunc32;
+ case IR::Opcode::CompositeConstructF16x2:
+ return IR::Opcode::CompositeConstructF32x2;
+ case IR::Opcode::CompositeConstructF16x3:
+ return IR::Opcode::CompositeConstructF32x3;
+ case IR::Opcode::CompositeConstructF16x4:
+ return IR::Opcode::CompositeConstructF32x4;
+ case IR::Opcode::CompositeExtractF16x2:
+ return IR::Opcode::CompositeExtractF32x2;
+ case IR::Opcode::CompositeExtractF16x3:
+ return IR::Opcode::CompositeExtractF32x3;
+ case IR::Opcode::CompositeExtractF16x4:
+ return IR::Opcode::CompositeExtractF32x4;
+ case IR::Opcode::CompositeInsertF16x2:
+ return IR::Opcode::CompositeInsertF32x2;
+ case IR::Opcode::CompositeInsertF16x3:
+ return IR::Opcode::CompositeInsertF32x3;
+ case IR::Opcode::CompositeInsertF16x4:
+ return IR::Opcode::CompositeInsertF32x4;
+ case IR::Opcode::FPOrdEqual16:
+ return IR::Opcode::FPOrdEqual32;
+ case IR::Opcode::FPUnordEqual16:
+ return IR::Opcode::FPUnordEqual32;
+ case IR::Opcode::FPOrdNotEqual16:
+ return IR::Opcode::FPOrdNotEqual32;
+ case IR::Opcode::FPUnordNotEqual16:
+ return IR::Opcode::FPUnordNotEqual32;
+ case IR::Opcode::FPOrdLessThan16:
+ return IR::Opcode::FPOrdLessThan32;
+ case IR::Opcode::FPUnordLessThan16:
+ return IR::Opcode::FPUnordLessThan32;
+ case IR::Opcode::FPOrdGreaterThan16:
+ return IR::Opcode::FPOrdGreaterThan32;
+ case IR::Opcode::FPUnordGreaterThan16:
+ return IR::Opcode::FPUnordGreaterThan32;
+ case IR::Opcode::FPOrdLessThanEqual16:
+ return IR::Opcode::FPOrdLessThanEqual32;
+ case IR::Opcode::FPUnordLessThanEqual16:
+ return IR::Opcode::FPUnordLessThanEqual32;
+ case IR::Opcode::FPOrdGreaterThanEqual16:
+ return IR::Opcode::FPOrdGreaterThanEqual32;
+ case IR::Opcode::FPUnordGreaterThanEqual16:
+ return IR::Opcode::FPUnordGreaterThanEqual32;
+ case IR::Opcode::FPIsNan16:
+ return IR::Opcode::FPIsNan32;
+ case IR::Opcode::ConvertS16F16:
+ return IR::Opcode::ConvertS16F32;
+ case IR::Opcode::ConvertS32F16:
+ return IR::Opcode::ConvertS32F32;
+ case IR::Opcode::ConvertS64F16:
+ return IR::Opcode::ConvertS64F32;
+ case IR::Opcode::ConvertU16F16:
+ return IR::Opcode::ConvertU16F32;
+ case IR::Opcode::ConvertU32F16:
+ return IR::Opcode::ConvertU32F32;
+ case IR::Opcode::ConvertU64F16:
+ return IR::Opcode::ConvertU64F32;
+ case IR::Opcode::PackFloat2x16:
+ return IR::Opcode::PackHalf2x16;
+ case IR::Opcode::UnpackFloat2x16:
+ return IR::Opcode::UnpackHalf2x16;
+ case IR::Opcode::ConvertF32F16:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF16F32:
+ return IR::Opcode::Identity;
+ case IR::Opcode::ConvertF16S8:
+ return IR::Opcode::ConvertF32S8;
+ case IR::Opcode::ConvertF16S16:
+ return IR::Opcode::ConvertF32S16;
+ case IR::Opcode::ConvertF16S32:
+ return IR::Opcode::ConvertF32S32;
+ case IR::Opcode::ConvertF16S64:
+ return IR::Opcode::ConvertF32S64;
+ case IR::Opcode::ConvertF16U8:
+ return IR::Opcode::ConvertF32U8;
+ case IR::Opcode::ConvertF16U16:
+ return IR::Opcode::ConvertF32U16;
+ case IR::Opcode::ConvertF16U32:
+ return IR::Opcode::ConvertF32U32;
+ case IR::Opcode::ConvertF16U64:
+ return IR::Opcode::ConvertF32U64;
+ case IR::Opcode::GlobalAtomicAddF16x2:
+ return IR::Opcode::GlobalAtomicAddF32x2;
+ case IR::Opcode::StorageAtomicAddF16x2:
+ return IR::Opcode::StorageAtomicAddF32x2;
+ case IR::Opcode::GlobalAtomicMinF16x2:
+ return IR::Opcode::GlobalAtomicMinF32x2;
+ case IR::Opcode::StorageAtomicMinF16x2:
+ return IR::Opcode::StorageAtomicMinF32x2;
+ case IR::Opcode::GlobalAtomicMaxF16x2:
+ return IR::Opcode::GlobalAtomicMaxF32x2;
+ case IR::Opcode::StorageAtomicMaxF16x2:
+ return IR::Opcode::StorageAtomicMaxF32x2;
+ default:
+ return op;
+ }
+}
+} // Anonymous namespace
+
+void LowerFp16ToFp32(IR::Program& program) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ inst.ReplaceOpcode(Replace(inst.GetOpcode()));
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..e80d3d1d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,218 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
+ if (packed.IsImmediate()) {
+ const u64 value{packed.U64()};
+ return {
+ ir.Imm32(static_cast<u32>(value)),
+ ir.Imm32(static_cast<u32>(value >> 32)),
+ };
+ } else {
+ return std::pair<IR::U32, IR::U32>{
+ ir.CompositeExtract(packed, 0u),
+ ir.CompositeExtract(packed, 1u),
+ };
+ }
+}
+
+void IAdd64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("IAdd64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
+ const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
+
+ const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
+ const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
+
+ const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ISub64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ISub64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
+ const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
+
+ const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
+ const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
+ const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
+
+ const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void INeg64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("INeg64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ lo = ir.BitwiseNot(lo);
+ hi = ir.BitwiseNot(hi);
+
+ lo = ir.IAdd(lo, ir.Imm32(1));
+
+ const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
+ hi = ir.IAdd(hi, carry);
+
+ inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
+}
+
+void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ const IR::U32 shift{inst.Arg(1)};
+
+ const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
+ const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
+
+ const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+ const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+ const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+ const IR::U32 long_ret_lo{ir.Imm32(0)};
+ const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
+
+ const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+ const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
+ const IR::U32 short_ret_lo{shifted_lo};
+ const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
+
+ const IR::U32 zero_ret_lo{lo};
+ const IR::U32 zero_ret_hi{hi};
+
+ const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+ const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+ const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+ const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ const IR::U32 shift{inst.Arg(1)};
+
+ const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
+ const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
+
+ const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+ const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+ const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+ const IR::U32 long_ret_hi{ir.Imm32(0)};
+ const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
+
+ const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+ const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
+ const IR::U32 short_ret_hi{shifted_hi};
+ const IR::U32 short_ret_lo{
+ ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
+
+ const IR::U32 zero_ret_lo{lo};
+ const IR::U32 zero_ret_hi{hi};
+
+ const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+ const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+ const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+ const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
+ if (inst.HasAssociatedPseudoOperation()) {
+ throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
+ }
+ IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+ const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
+ const IR::U32 shift{inst.Arg(1)};
+
+ const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
+ const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
+
+ const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
+
+ const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
+ const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
+ const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
+
+ const IR::U32 long_ret_hi{sign_extension};
+ const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
+
+ const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
+ const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
+ const IR::U32 short_ret_hi{shifted_hi};
+ const IR::U32 short_ret_lo{
+ ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
+
+ const IR::U32 zero_ret_lo{lo};
+ const IR::U32 zero_ret_hi{hi};
+
+ const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
+ const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
+
+ const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
+ const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
+ inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
+}
+
+void Lower(IR::Block& block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::PackUint2x32:
+ case IR::Opcode::UnpackUint2x32:
+ return inst.ReplaceOpcode(IR::Opcode::Identity);
+ case IR::Opcode::IAdd64:
+ return IAdd64To32(block, inst);
+ case IR::Opcode::ISub64:
+ return ISub64To32(block, inst);
+ case IR::Opcode::INeg64:
+ return INeg64To32(block, inst);
+ case IR::Opcode::ShiftLeftLogical64:
+ return ShiftLeftLogical64To32(block, inst);
+ case IR::Opcode::ShiftRightLogical64:
+ return ShiftRightLogical64To32(block, inst);
+ case IR::Opcode::ShiftRightArithmetic64:
+ return ShiftRightArithmetic64To32(block, inst);
+ default:
+ break;
+ }
+}
+} // Anonymous namespace
+
+void LowerInt64ToInt32(IR::Program& program) {
+ const auto end{program.post_order_blocks.rend()};
+ for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
+ IR::Block* const block{*it};
+ for (IR::Inst& inst : block->Instructions()) {
+ Lower(*block, inst);
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
new file mode 100644
index 000000000..2f89b1ea0
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/program.h"
+
+namespace Shader::Optimization {
+
+void CollectShaderInfoPass(Environment& env, IR::Program& program);
+void ConstantPropagationPass(IR::Program& program);
+void DeadCodeEliminationPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void IdentityRemovalPass(IR::Program& program);
+void LowerFp16ToFp32(IR::Program& program);
+void LowerInt64ToInt32(IR::Program& program);
+void SsaRewritePass(IR::Program& program);
+void TexturePass(Environment& env, IR::Program& program);
+void VerificationPass(const IR::Program& program);
+
+// Dual Vertex
+void VertexATransformPass(IR::Program& program);
+void VertexBTransformPass(IR::Program& program);
+void JoinTextureInfo(Info& base, Info& source);
+void JoinStorageInfo(Info& base, Info& source);
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 000000000..53145fb5e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,383 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file implements the SSA rewriting algorithm proposed in
+//
+// Simple and Efficient Construction of Static Single Assignment Form.
+// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
+// In: Jhala R., De Bosschere K. (eds)
+// Compiler Construction. CC 2013.
+// Lecture Notes in Computer Science, vol 7791.
+// Springer, Berlin, Heidelberg
+//
+// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
+//
+
+#include <span>
+#include <variant>
+#include <vector>
+
+#include <boost/container/flat_map.hpp>
+#include <boost/container/flat_set.hpp>
+
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/pred.h"
+#include "shader_recompiler/frontend/ir/reg.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+struct FlagTag {
+ auto operator<=>(const FlagTag&) const noexcept = default;
+};
+struct ZeroFlagTag : FlagTag {};
+struct SignFlagTag : FlagTag {};
+struct CarryFlagTag : FlagTag {};
+struct OverflowFlagTag : FlagTag {};
+
+struct GotoVariable : FlagTag {
+ GotoVariable() = default;
+ explicit GotoVariable(u32 index_) : index{index_} {}
+
+ auto operator<=>(const GotoVariable&) const noexcept = default;
+
+ u32 index;
+};
+
+struct IndirectBranchVariable {
+ auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
+};
+
+using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
+ OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
+using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
+
+struct DefTable {
+ const IR::Value& Def(IR::Block* block, IR::Reg variable) {
+ return block->SsaRegValue(variable);
+ }
+ void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
+ block->SetSsaRegValue(variable, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, IR::Pred variable) {
+ return preds[IR::PredIndex(variable)][block];
+ }
+ void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
+ preds[IR::PredIndex(variable)].insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, GotoVariable variable) {
+ return goto_vars[variable.index][block];
+ }
+ void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
+ goto_vars[variable.index].insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
+ return indirect_branch_var[block];
+ }
+ void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
+ indirect_branch_var.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
+ return zero_flag[block];
+ }
+ void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
+ zero_flag.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, SignFlagTag) {
+ return sign_flag[block];
+ }
+ void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
+ sign_flag.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, CarryFlagTag) {
+ return carry_flag[block];
+ }
+ void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
+ carry_flag.insert_or_assign(block, value);
+ }
+
+ const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
+ return overflow_flag[block];
+ }
+ void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
+ overflow_flag.insert_or_assign(block, value);
+ }
+
+ std::array<ValueMap, IR::NUM_USER_PREDS> preds;
+ boost::container::flat_map<u32, ValueMap> goto_vars;
+ ValueMap indirect_branch_var;
+ ValueMap zero_flag;
+ ValueMap sign_flag;
+ ValueMap carry_flag;
+ ValueMap overflow_flag;
+};
+
+IR::Opcode UndefOpcode(IR::Reg) noexcept {
+ return IR::Opcode::UndefU32;
+}
+
+IR::Opcode UndefOpcode(IR::Pred) noexcept {
+ return IR::Opcode::UndefU1;
+}
+
+IR::Opcode UndefOpcode(const FlagTag&) noexcept {
+ return IR::Opcode::UndefU1;
+}
+
+IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
+ return IR::Opcode::UndefU32;
+}
+
+enum class Status {
+ Start,
+ SetValue,
+ PreparePhiArgument,
+ PushPhiArgument,
+};
+
+template <typename Type>
+struct ReadState {
+ ReadState(IR::Block* block_) : block{block_} {}
+ ReadState() = default;
+
+ IR::Block* block{};
+ IR::Value result{};
+ IR::Inst* phi{};
+ IR::Block* const* pred_it{};
+ IR::Block* const* pred_end{};
+ Status pc{Status::Start};
+};
+
+class Pass {
+public:
+ template <typename Type>
+ void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
+ current_def.SetDef(block, variable, value);
+ }
+
+ template <typename Type>
+ IR::Value ReadVariable(Type variable, IR::Block* root_block) {
+ boost::container::small_vector<ReadState<Type>, 64> stack{
+ ReadState<Type>(nullptr),
+ ReadState<Type>(root_block),
+ };
+ const auto prepare_phi_operand{[&] {
+ if (stack.back().pred_it == stack.back().pred_end) {
+ IR::Inst* const phi{stack.back().phi};
+ IR::Block* const block{stack.back().block};
+ const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
+ stack.pop_back();
+ stack.back().result = result;
+ WriteVariable(variable, block, result);
+ } else {
+ IR::Block* const imm_pred{*stack.back().pred_it};
+ stack.back().pc = Status::PushPhiArgument;
+ stack.emplace_back(imm_pred);
+ }
+ }};
+ do {
+ IR::Block* const block{stack.back().block};
+ switch (stack.back().pc) {
+ case Status::Start: {
+ if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
+ stack.back().result = def;
+ } else if (!block->IsSsaSealed()) {
+ // Incomplete CFG
+ IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+ phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
+ incomplete_phis[block].insert_or_assign(variable, phi);
+ stack.back().result = IR::Value{&*phi};
+ } else if (const std::span imm_preds = block->ImmPredecessors();
+ imm_preds.size() == 1) {
+ // Optimize the common case of one predecessor: no phi needed
+ stack.back().pc = Status::SetValue;
+ stack.emplace_back(imm_preds.front());
+ break;
+ } else {
+ // Break potential cycles with operandless phi
+ IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
+ phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
+
+ WriteVariable(variable, block, IR::Value{phi});
+
+ stack.back().phi = phi;
+ stack.back().pred_it = imm_preds.data();
+ stack.back().pred_end = imm_preds.data() + imm_preds.size();
+ prepare_phi_operand();
+ break;
+ }
+ }
+ [[fallthrough]];
+ case Status::SetValue: {
+ const IR::Value result{stack.back().result};
+ WriteVariable(variable, block, result);
+ stack.pop_back();
+ stack.back().result = result;
+ break;
+ }
+ case Status::PushPhiArgument: {
+ IR::Inst* const phi{stack.back().phi};
+ phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
+ ++stack.back().pred_it;
+ }
+ [[fallthrough]];
+ case Status::PreparePhiArgument:
+ prepare_phi_operand();
+ break;
+ }
+ } while (stack.size() > 1);
+ return stack.back().result;
+ }
+
+ void SealBlock(IR::Block* block) {
+ const auto it{incomplete_phis.find(block)};
+ if (it != incomplete_phis.end()) {
+ for (auto& pair : it->second) {
+ auto& variant{pair.first};
+ auto& phi{pair.second};
+ std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
+ }
+ }
+ block->SsaSeal();
+ }
+
+private:
+ template <typename Type>
+ IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
+ for (IR::Block* const imm_pred : block->ImmPredecessors()) {
+ phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
+ }
+ return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
+ }
+
+ IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
+ IR::Value same;
+ const size_t num_args{phi.NumArgs()};
+ for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
+ const IR::Value& op{phi.Arg(arg_index)};
+ if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
+ // Unique value or self-reference
+ continue;
+ }
+ if (!same.IsEmpty()) {
+ // The phi merges at least two values: not trivial
+ return IR::Value{&phi};
+ }
+ same = op;
+ }
+ // Remove the phi node from the block, it will be reinserted
+ IR::Block::InstructionList& list{block->Instructions()};
+ list.erase(IR::Block::InstructionList::s_iterator_to(phi));
+
+ // Find the first non-phi instruction and use it as an insertion point
+ IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
+ if (same.IsEmpty()) {
+ // The phi is unreachable or in the start block
+ // Insert an undefined instruction and make it the phi node replacement
+ // The "phi" node reinsertion point is specified after this instruction
+ reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
+ same = IR::Value{&*reinsert_point};
+ ++reinsert_point;
+ }
+ // Reinsert the phi node and reroute all its uses to the "same" value
+ list.insert(reinsert_point, phi);
+ phi.ReplaceUsesWith(same);
+ // TODO: Try to recursively remove all phi users, which might have become trivial
+ return same;
+ }
+
+ boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
+ incomplete_phis;
+ DefTable current_def;
+};
+
+void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::SetRegister:
+ if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+ pass.WriteVariable(reg, block, inst.Arg(1));
+ }
+ break;
+ case IR::Opcode::SetPred:
+ if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+ pass.WriteVariable(pred, block, inst.Arg(1));
+ }
+ break;
+ case IR::Opcode::SetGotoVariable:
+ pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
+ break;
+ case IR::Opcode::SetIndirectBranchVariable:
+ pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetZFlag:
+ pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetSFlag:
+ pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetCFlag:
+ pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::SetOFlag:
+ pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
+ break;
+ case IR::Opcode::GetRegister:
+ if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
+ inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
+ }
+ break;
+ case IR::Opcode::GetPred:
+ if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
+ inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
+ }
+ break;
+ case IR::Opcode::GetGotoVariable:
+ inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
+ break;
+ case IR::Opcode::GetIndirectBranchVariable:
+ inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
+ break;
+ case IR::Opcode::GetZFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
+ break;
+ case IR::Opcode::GetSFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
+ break;
+ case IR::Opcode::GetCFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
+ break;
+ case IR::Opcode::GetOFlag:
+ inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
+ break;
+ default:
+ break;
+ }
+}
+
+void VisitBlock(Pass& pass, IR::Block* block) {
+ for (IR::Inst& inst : block->Instructions()) {
+ VisitInst(pass, block, inst);
+ }
+ pass.SealBlock(block);
+}
+} // Anonymous namespace
+
+void SsaRewritePass(IR::Program& program) {
+ Pass pass;
+ const auto end{program.post_order_blocks.rend()};
+ for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
+ VisitBlock(pass, *block);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..44ad10d43
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,523 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <bit>
+#include <optional>
+
+#include <boost/container/small_vector.hpp>
+
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/breadth_first_search.h"
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/ir_opt/passes.h"
+#include "shader_recompiler/shader_info.h"
+
+namespace Shader::Optimization {
+namespace {
+struct ConstBufferAddr {
+ u32 index;
+ u32 offset;
+ u32 secondary_index;
+ u32 secondary_offset;
+ IR::U32 dynamic_offset;
+ u32 count;
+ bool has_secondary;
+};
+
+struct TextureInst {
+ ConstBufferAddr cbuf;
+ IR::Inst* inst;
+ IR::Block* block;
+};
+
+using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
+
+constexpr u32 DESCRIPTOR_SIZE = 8;
+constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
+
+IR::Opcode IndexedInstruction(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ return IR::Opcode::ImageSampleImplicitLod;
+ case IR::Opcode::BoundImageSampleExplicitLod:
+ case IR::Opcode::BindlessImageSampleExplicitLod:
+ return IR::Opcode::ImageSampleExplicitLod;
+ case IR::Opcode::BoundImageSampleDrefImplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+ return IR::Opcode::ImageSampleDrefImplicitLod;
+ case IR::Opcode::BoundImageSampleDrefExplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+ return IR::Opcode::ImageSampleDrefExplicitLod;
+ case IR::Opcode::BindlessImageGather:
+ case IR::Opcode::BoundImageGather:
+ return IR::Opcode::ImageGather;
+ case IR::Opcode::BindlessImageGatherDref:
+ case IR::Opcode::BoundImageGatherDref:
+ return IR::Opcode::ImageGatherDref;
+ case IR::Opcode::BindlessImageFetch:
+ case IR::Opcode::BoundImageFetch:
+ return IR::Opcode::ImageFetch;
+ case IR::Opcode::BoundImageQueryDimensions:
+ case IR::Opcode::BindlessImageQueryDimensions:
+ return IR::Opcode::ImageQueryDimensions;
+ case IR::Opcode::BoundImageQueryLod:
+ case IR::Opcode::BindlessImageQueryLod:
+ return IR::Opcode::ImageQueryLod;
+ case IR::Opcode::BoundImageGradient:
+ case IR::Opcode::BindlessImageGradient:
+ return IR::Opcode::ImageGradient;
+ case IR::Opcode::BoundImageRead:
+ case IR::Opcode::BindlessImageRead:
+ return IR::Opcode::ImageRead;
+ case IR::Opcode::BoundImageWrite:
+ case IR::Opcode::BindlessImageWrite:
+ return IR::Opcode::ImageWrite;
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ return IR::Opcode::ImageAtomicIAdd32;
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ return IR::Opcode::ImageAtomicSMin32;
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ return IR::Opcode::ImageAtomicUMin32;
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ return IR::Opcode::ImageAtomicSMax32;
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ return IR::Opcode::ImageAtomicUMax32;
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ return IR::Opcode::ImageAtomicInc32;
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ return IR::Opcode::ImageAtomicDec32;
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ return IR::Opcode::ImageAtomicAnd32;
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ return IR::Opcode::ImageAtomicOr32;
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ return IR::Opcode::ImageAtomicXor32;
+ case IR::Opcode::BoundImageAtomicExchange32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ return IR::Opcode::ImageAtomicExchange32;
+ default:
+ return IR::Opcode::Void;
+ }
+}
+
+bool IsBindless(const IR::Inst& inst) {
+ switch (inst.GetOpcode()) {
+ case IR::Opcode::BindlessImageSampleImplicitLod:
+ case IR::Opcode::BindlessImageSampleExplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefImplicitLod:
+ case IR::Opcode::BindlessImageSampleDrefExplicitLod:
+ case IR::Opcode::BindlessImageGather:
+ case IR::Opcode::BindlessImageGatherDref:
+ case IR::Opcode::BindlessImageFetch:
+ case IR::Opcode::BindlessImageQueryDimensions:
+ case IR::Opcode::BindlessImageQueryLod:
+ case IR::Opcode::BindlessImageGradient:
+ case IR::Opcode::BindlessImageRead:
+ case IR::Opcode::BindlessImageWrite:
+ case IR::Opcode::BindlessImageAtomicIAdd32:
+ case IR::Opcode::BindlessImageAtomicSMin32:
+ case IR::Opcode::BindlessImageAtomicUMin32:
+ case IR::Opcode::BindlessImageAtomicSMax32:
+ case IR::Opcode::BindlessImageAtomicUMax32:
+ case IR::Opcode::BindlessImageAtomicInc32:
+ case IR::Opcode::BindlessImageAtomicDec32:
+ case IR::Opcode::BindlessImageAtomicAnd32:
+ case IR::Opcode::BindlessImageAtomicOr32:
+ case IR::Opcode::BindlessImageAtomicXor32:
+ case IR::Opcode::BindlessImageAtomicExchange32:
+ return true;
+ case IR::Opcode::BoundImageSampleImplicitLod:
+ case IR::Opcode::BoundImageSampleExplicitLod:
+ case IR::Opcode::BoundImageSampleDrefImplicitLod:
+ case IR::Opcode::BoundImageSampleDrefExplicitLod:
+ case IR::Opcode::BoundImageGather:
+ case IR::Opcode::BoundImageGatherDref:
+ case IR::Opcode::BoundImageFetch:
+ case IR::Opcode::BoundImageQueryDimensions:
+ case IR::Opcode::BoundImageQueryLod:
+ case IR::Opcode::BoundImageGradient:
+ case IR::Opcode::BoundImageRead:
+ case IR::Opcode::BoundImageWrite:
+ case IR::Opcode::BoundImageAtomicIAdd32:
+ case IR::Opcode::BoundImageAtomicSMin32:
+ case IR::Opcode::BoundImageAtomicUMin32:
+ case IR::Opcode::BoundImageAtomicSMax32:
+ case IR::Opcode::BoundImageAtomicUMax32:
+ case IR::Opcode::BoundImageAtomicInc32:
+ case IR::Opcode::BoundImageAtomicDec32:
+ case IR::Opcode::BoundImageAtomicAnd32:
+ case IR::Opcode::BoundImageAtomicOr32:
+ case IR::Opcode::BoundImageAtomicXor32:
+ case IR::Opcode::BoundImageAtomicExchange32:
+ return false;
+ default:
+ throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
+ }
+}
+
+bool IsTextureInstruction(const IR::Inst& inst) {
+ return IndexedInstruction(inst) != IR::Opcode::Void;
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
+
+std::optional<ConstBufferAddr> Track(const IR::Value& value) {
+ return IR::BreadthFirstSearch(value, TryGetConstBuffer);
+}
+
+std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
+ switch (inst->GetOpcode()) {
+ default:
+ return std::nullopt;
+ case IR::Opcode::BitwiseOr32: {
+ std::optional lhs{Track(inst->Arg(0))};
+ std::optional rhs{Track(inst->Arg(1))};
+ if (!lhs || !rhs) {
+ return std::nullopt;
+ }
+ if (lhs->has_secondary || rhs->has_secondary) {
+ return std::nullopt;
+ }
+ if (lhs->count > 1 || rhs->count > 1) {
+ return std::nullopt;
+ }
+ if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
+ std::swap(lhs, rhs);
+ }
+ return ConstBufferAddr{
+ .index = lhs->index,
+ .offset = lhs->offset,
+ .secondary_index = rhs->index,
+ .secondary_offset = rhs->offset,
+ .dynamic_offset = {},
+ .count = 1,
+ .has_secondary = true,
+ };
+ }
+ case IR::Opcode::GetCbufU32x2:
+ case IR::Opcode::GetCbufU32:
+ break;
+ }
+ const IR::Value index{inst->Arg(0)};
+ const IR::Value offset{inst->Arg(1)};
+ if (!index.IsImmediate()) {
+ // Reading a bindless texture from variable indices is valid
+ // but not supported here at the moment
+ return std::nullopt;
+ }
+ if (offset.IsImmediate()) {
+ return ConstBufferAddr{
+ .index = index.U32(),
+ .offset = offset.U32(),
+ .secondary_index = 0,
+ .secondary_offset = 0,
+ .dynamic_offset = {},
+ .count = 1,
+ .has_secondary = false,
+ };
+ }
+ IR::Inst* const offset_inst{offset.InstRecursive()};
+ if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
+ return std::nullopt;
+ }
+ u32 base_offset{};
+ IR::U32 dynamic_offset;
+ if (offset_inst->Arg(0).IsImmediate()) {
+ base_offset = offset_inst->Arg(0).U32();
+ dynamic_offset = IR::U32{offset_inst->Arg(1)};
+ } else if (offset_inst->Arg(1).IsImmediate()) {
+ base_offset = offset_inst->Arg(1).U32();
+ dynamic_offset = IR::U32{offset_inst->Arg(0)};
+ } else {
+ return std::nullopt;
+ }
+ return ConstBufferAddr{
+ .index = index.U32(),
+ .offset = base_offset,
+ .secondary_index = 0,
+ .secondary_offset = 0,
+ .dynamic_offset = dynamic_offset,
+ .count = 8,
+ .has_secondary = false,
+ };
+}
+
+TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
+ ConstBufferAddr addr;
+ if (IsBindless(inst)) {
+ const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
+ if (!track_addr) {
+ throw NotImplementedException("Failed to track bindless texture constant buffer");
+ }
+ addr = *track_addr;
+ } else {
+ addr = ConstBufferAddr{
+ .index = env.TextureBoundBuffer(),
+ .offset = inst.Arg(0).U32(),
+ .secondary_index = 0,
+ .secondary_offset = 0,
+ .dynamic_offset = {},
+ .count = 1,
+ .has_secondary = false,
+ };
+ }
+ return TextureInst{
+ .cbuf = addr,
+ .inst = &inst,
+ .block = block,
+ };
+}
+
+TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+ const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
+ const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
+ const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
+ const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
+ return env.ReadTextureType(lhs_raw | rhs_raw);
+}
+
+class Descriptors {
+public:
+ explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
+ ImageBufferDescriptors& image_buffer_descriptors_,
+ TextureDescriptors& texture_descriptors_,
+ ImageDescriptors& image_descriptors_)
+ : texture_buffer_descriptors{texture_buffer_descriptors_},
+ image_buffer_descriptors{image_buffer_descriptors_},
+ texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {}
+
+ u32 Add(const TextureBufferDescriptor& desc) {
+ return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
+ return desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset &&
+ desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
+ desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.count == existing.count && desc.size_shift == existing.size_shift &&
+ desc.has_secondary == existing.has_secondary;
+ });
+ }
+
+ u32 Add(const ImageBufferDescriptor& desc) {
+ const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
+ return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
+ desc.size_shift == existing.size_shift;
+ })};
+ image_buffer_descriptors[index].is_written |= desc.is_written;
+ image_buffer_descriptors[index].is_read |= desc.is_read;
+ return index;
+ }
+
+ u32 Add(const TextureDescriptor& desc) {
+ return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+ return desc.type == existing.type && desc.is_depth == existing.is_depth &&
+ desc.has_secondary == existing.has_secondary &&
+ desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset &&
+ desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
+ desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.count == existing.count && desc.size_shift == existing.size_shift;
+ });
+ }
+
+ u32 Add(const ImageDescriptor& desc) {
+ const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
+ return desc.type == existing.type && desc.format == existing.format &&
+ desc.cbuf_index == existing.cbuf_index &&
+ desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
+ desc.size_shift == existing.size_shift;
+ })};
+ image_descriptors[index].is_written |= desc.is_written;
+ image_descriptors[index].is_read |= desc.is_read;
+ return index;
+ }
+
+private:
+ template <typename Descriptors, typename Descriptor, typename Func>
+ static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
+ // TODO: Handle arrays
+ const auto it{std::ranges::find_if(descriptors, pred)};
+ if (it != descriptors.end()) {
+ return static_cast<u32>(std::distance(descriptors.begin(), it));
+ }
+ descriptors.push_back(desc);
+ return static_cast<u32>(descriptors.size()) - 1;
+ }
+
+ TextureBufferDescriptors& texture_buffer_descriptors;
+ ImageBufferDescriptors& image_buffer_descriptors;
+ TextureDescriptors& texture_descriptors;
+ ImageDescriptors& image_descriptors;
+};
+} // Anonymous namespace
+
+void TexturePass(Environment& env, IR::Program& program) {
+ TextureInstVector to_replace;
+ for (IR::Block* const block : program.post_order_blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (!IsTextureInstruction(inst)) {
+ continue;
+ }
+ to_replace.push_back(MakeInst(env, block, inst));
+ }
+ }
+ // Sort instructions to visit textures by constant buffer index, then by offset
+ std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
+ return lhs.cbuf.offset < rhs.cbuf.offset;
+ });
+ std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
+ return lhs.cbuf.index < rhs.cbuf.index;
+ });
+ Descriptors descriptors{
+ program.info.texture_buffer_descriptors,
+ program.info.image_buffer_descriptors,
+ program.info.texture_descriptors,
+ program.info.image_descriptors,
+ };
+ for (TextureInst& texture_inst : to_replace) {
+ // TODO: Handle arrays
+ IR::Inst* const inst{texture_inst.inst};
+ inst->ReplaceOpcode(IndexedInstruction(*inst));
+
+ const auto& cbuf{texture_inst.cbuf};
+ auto flags{inst->Flags<IR::TextureInstInfo>()};
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::ImageQueryDimensions:
+ flags.type.Assign(ReadTextureType(env, cbuf));
+ inst->SetFlags(flags);
+ break;
+ case IR::Opcode::ImageFetch:
+ if (flags.type != TextureType::Color1D) {
+ break;
+ }
+ if (ReadTextureType(env, cbuf) == TextureType::Buffer) {
+ // Replace with the bound texture type only when it's a texture buffer
+ // If the instruction is 1D and the bound type is 2D, don't change the code and let
+ // the rasterizer robustness handle it
+ // This happens on Fire Emblem: Three Houses
+ flags.type.Assign(TextureType::Buffer);
+ }
+ break;
+ default:
+ break;
+ }
+ u32 index;
+ switch (inst->GetOpcode()) {
+ case IR::Opcode::ImageRead:
+ case IR::Opcode::ImageAtomicIAdd32:
+ case IR::Opcode::ImageAtomicSMin32:
+ case IR::Opcode::ImageAtomicUMin32:
+ case IR::Opcode::ImageAtomicSMax32:
+ case IR::Opcode::ImageAtomicUMax32:
+ case IR::Opcode::ImageAtomicInc32:
+ case IR::Opcode::ImageAtomicDec32:
+ case IR::Opcode::ImageAtomicAnd32:
+ case IR::Opcode::ImageAtomicOr32:
+ case IR::Opcode::ImageAtomicXor32:
+ case IR::Opcode::ImageAtomicExchange32:
+ case IR::Opcode::ImageWrite: {
+ if (cbuf.has_secondary) {
+ throw NotImplementedException("Unexpected separate sampler");
+ }
+ const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
+ const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
+ if (flags.type == TextureType::Buffer) {
+ index = descriptors.Add(ImageBufferDescriptor{
+ .format = flags.image_format,
+ .is_written = is_written,
+ .is_read = is_read,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ } else {
+ index = descriptors.Add(ImageDescriptor{
+ .type = flags.type,
+ .format = flags.image_format,
+ .is_written = is_written,
+ .is_read = is_read,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ }
+ break;
+ }
+ default:
+ if (flags.type == TextureType::Buffer) {
+ index = descriptors.Add(TextureBufferDescriptor{
+ .has_secondary = cbuf.has_secondary,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .secondary_cbuf_index = cbuf.secondary_index,
+ .secondary_cbuf_offset = cbuf.secondary_offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ } else {
+ index = descriptors.Add(TextureDescriptor{
+ .type = flags.type,
+ .is_depth = flags.is_depth != 0,
+ .has_secondary = cbuf.has_secondary,
+ .cbuf_index = cbuf.index,
+ .cbuf_offset = cbuf.offset,
+ .secondary_cbuf_index = cbuf.secondary_index,
+ .secondary_cbuf_offset = cbuf.secondary_offset,
+ .count = cbuf.count,
+ .size_shift = DESCRIPTOR_SIZE_SHIFT,
+ });
+ }
+ break;
+ }
+ flags.descriptor_index.Assign(index);
+ inst->SetFlags(flags);
+
+ if (cbuf.count > 1) {
+ const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
+ IR::IREmitter ir{*texture_inst.block, insert_point};
+ const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
+ inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift));
+ } else {
+ inst->SetArg(0, IR::Value{});
+ }
+ }
+}
+
+void JoinTextureInfo(Info& base, Info& source) {
+ Descriptors descriptors{
+ base.texture_buffer_descriptors,
+ base.image_buffer_descriptors,
+ base.texture_descriptors,
+ base.image_descriptors,
+ };
+ for (auto& desc : source.texture_buffer_descriptors) {
+ descriptors.Add(desc);
+ }
+ for (auto& desc : source.image_buffer_descriptors) {
+ descriptors.Add(desc);
+ }
+ for (auto& desc : source.texture_descriptors) {
+ descriptors.Add(desc);
+ }
+ for (auto& desc : source.image_descriptors) {
+ descriptors.Add(desc);
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
new file mode 100644
index 000000000..975d5aadf
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -0,0 +1,98 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <set>
+
+#include "shader_recompiler/exception.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+static void ValidateTypes(const IR::Program& program) {
+ for (const auto& block : program.blocks) {
+ for (const IR::Inst& inst : *block) {
+ if (inst.GetOpcode() == IR::Opcode::Phi) {
+ // Skip validation on phi nodes
+ continue;
+ }
+ const size_t num_args{inst.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ const IR::Type t1{inst.Arg(i).Type()};
+ const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
+ if (!IR::AreTypesCompatible(t1, t2)) {
+ throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
+ }
+ }
+ }
+ }
+}
+
+static void ValidateUses(const IR::Program& program) {
+ std::map<IR::Inst*, int> actual_uses;
+ for (const auto& block : program.blocks) {
+ for (const IR::Inst& inst : *block) {
+ const size_t num_args{inst.NumArgs()};
+ for (size_t i = 0; i < num_args; ++i) {
+ const IR::Value arg{inst.Arg(i)};
+ if (!arg.IsImmediate()) {
+ ++actual_uses[arg.Inst()];
+ }
+ }
+ }
+ }
+ for (const auto [inst, uses] : actual_uses) {
+ if (inst->UseCount() != uses) {
+ throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
+ }
+ }
+}
+
+static void ValidateForwardDeclarations(const IR::Program& program) {
+ std::set<const IR::Inst*> definitions;
+ for (const IR::Block* const block : program.blocks) {
+ for (const IR::Inst& inst : *block) {
+ definitions.emplace(&inst);
+ if (inst.GetOpcode() == IR::Opcode::Phi) {
+ // Phi nodes can have forward declarations
+ continue;
+ }
+ const size_t num_args{inst.NumArgs()};
+ for (size_t arg = 0; arg < num_args; ++arg) {
+ if (inst.Arg(arg).IsImmediate()) {
+ continue;
+ }
+ if (!definitions.contains(inst.Arg(arg).Inst())) {
+ throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
+ }
+ }
+ }
+ }
+}
+
+static void ValidatePhiNodes(const IR::Program& program) {
+ for (const IR::Block* const block : program.blocks) {
+ bool no_more_phis{false};
+ for (const IR::Inst& inst : *block) {
+ if (inst.GetOpcode() == IR::Opcode::Phi) {
+ if (no_more_phis) {
+ throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
+ }
+ } else {
+ no_more_phis = true;
+ }
+ }
+ }
+}
+
+void VerificationPass(const IR::Program& program) {
+ ValidateTypes(program);
+ ValidateUses(program);
+ ValidateForwardDeclarations(program);
+ ValidatePhiNodes(program);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
new file mode 100644
index 000000000..f3b12d04b
--- /dev/null
+++ b/src/shader_recompiler/object_pool.h
@@ -0,0 +1,105 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace Shader {
+
+template <typename T>
+requires std::is_destructible_v<T> class ObjectPool {
+public:
+ explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
+ node = &chunks.emplace_back(new_chunk_size);
+ }
+
+ template <typename... Args>
+ requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) {
+ return std::construct_at(Memory(), std::forward<Args>(args)...);
+ }
+
+ void ReleaseContents() {
+ if (chunks.empty()) {
+ return;
+ }
+ Chunk& root{chunks.front()};
+ if (root.used_objects == root.num_objects) {
+ // Root chunk has been filled, squash allocations into it
+ const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
+ chunks.clear();
+ chunks.emplace_back(total_objects);
+ } else {
+ root.Release();
+ chunks.resize(1);
+ }
+ chunks.shrink_to_fit();
+ node = &chunks.front();
+ }
+
+private:
+ struct NonTrivialDummy {
+ NonTrivialDummy() noexcept {}
+ };
+
+ union Storage {
+ Storage() noexcept {}
+ ~Storage() noexcept {}
+
+ NonTrivialDummy dummy{};
+ T object;
+ };
+
+ struct Chunk {
+ explicit Chunk() = default;
+ explicit Chunk(size_t size)
+ : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
+
+ Chunk& operator=(Chunk&& rhs) noexcept {
+ Release();
+ used_objects = std::exchange(rhs.used_objects, 0);
+ num_objects = std::exchange(rhs.num_objects, 0);
+ storage = std::move(rhs.storage);
+ return *this;
+ }
+
+ Chunk(Chunk&& rhs) noexcept
+ : used_objects{std::exchange(rhs.used_objects, 0)},
+ num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
+
+ ~Chunk() {
+ Release();
+ }
+
+ void Release() {
+ std::destroy_n(storage.get(), used_objects);
+ used_objects = 0;
+ }
+
+ size_t used_objects{};
+ size_t num_objects{};
+ std::unique_ptr<Storage[]> storage;
+ };
+
+ [[nodiscard]] T* Memory() {
+ Chunk* const chunk{FreeChunk()};
+ return &chunk->storage[chunk->used_objects++].object;
+ }
+
+ [[nodiscard]] Chunk* FreeChunk() {
+ if (node->used_objects != node->num_objects) {
+ return node;
+ }
+ node = &chunks.emplace_back(new_chunk_size);
+ return node;
+ }
+
+ Chunk* node{};
+ std::vector<Chunk> chunks;
+ size_t new_chunk_size{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
new file mode 100644
index 000000000..f0c3b3b17
--- /dev/null
+++ b/src/shader_recompiler/profile.h
@@ -0,0 +1,74 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader {
+
+struct Profile {
+ u32 supported_spirv{0x00010000};
+
+ bool unified_descriptor_binding{};
+ bool support_descriptor_aliasing{};
+ bool support_int8{};
+ bool support_int16{};
+ bool support_int64{};
+ bool support_vertex_instance_id{};
+ bool support_float_controls{};
+ bool support_separate_denorm_behavior{};
+ bool support_separate_rounding_mode{};
+ bool support_fp16_denorm_preserve{};
+ bool support_fp32_denorm_preserve{};
+ bool support_fp16_denorm_flush{};
+ bool support_fp32_denorm_flush{};
+ bool support_fp16_signed_zero_nan_preserve{};
+ bool support_fp32_signed_zero_nan_preserve{};
+ bool support_fp64_signed_zero_nan_preserve{};
+ bool support_explicit_workgroup_layout{};
+ bool support_vote{};
+ bool support_viewport_index_layer_non_geometry{};
+ bool support_viewport_mask{};
+ bool support_typeless_image_loads{};
+ bool support_demote_to_helper_invocation{};
+ bool support_int64_atomics{};
+ bool support_derivative_control{};
+ bool support_geometry_shader_passthrough{};
+ bool support_gl_nv_gpu_shader_5{};
+ bool support_gl_amd_gpu_shader_half_float{};
+ bool support_gl_texture_shadow_lod{};
+ bool support_gl_warp_intrinsics{};
+ bool support_gl_variable_aoffi{};
+ bool support_gl_sparse_textures{};
+ bool support_gl_derivative_control{};
+
+ bool warp_size_potentially_larger_than_guest{};
+
+ bool lower_left_origin_mode{};
+ /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
+ /// See Ori and the Blind Forest's main menu for reference.
+ bool need_declared_frag_colors{};
+ /// Prevents fast math optimizations that may cause inaccuracies
+ bool need_fastmath_off{};
+
+ /// OpFClamp is broken and OpFMax + OpFMin should be used instead
+ bool has_broken_spirv_clamp{};
+ /// Offset image operands with an unsigned type do not work
+ bool has_broken_unsigned_image_offsets{};
+ /// Signed instructions with unsigned data types are misinterpreted
+ bool has_broken_signed_operations{};
+ /// Float controls break when fp16 is enabled
+ bool has_broken_fp16_float_controls{};
+ /// Dynamic vec4 indexing is broken on some OpenGL drivers
+ bool has_gl_component_indexing_bug{};
+ /// The precise type qualifier is broken in the fragment stage of some drivers
+ bool has_gl_precise_bug{};
+ /// Ignores SPIR-V ordered vs unordered using GLSL semantics
+ bool ignore_nan_fp_comparisons{};
+
+ u32 gl_max_compute_smem_size{};
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 000000000..bd6c2bfb5
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,219 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class OutputTopology : u32 {
+ PointList = 1,
+ LineStrip = 6,
+ TriangleStrip = 7,
+};
+
+enum class PixelImap : u8 {
+ Unused = 0,
+ Constant = 1,
+ Perspective = 2,
+ ScreenLinear = 3,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
+struct ProgramHeader {
+ union {
+ BitField<0, 5, u32> sph_type;
+ BitField<5, 5, u32> version;
+ BitField<10, 4, u32> shader_type;
+ BitField<14, 1, u32> mrt_enable;
+ BitField<15, 1, u32> kills_pixels;
+ BitField<16, 1, u32> does_global_store;
+ BitField<17, 4, u32> sass_version;
+ BitField<21, 2, u32> reserved1;
+ BitField<24, 1, u32> geometry_passthrough;
+ BitField<25, 1, u32> reserved2;
+ BitField<26, 1, u32> does_load_or_store;
+ BitField<27, 1, u32> does_fp64;
+ BitField<28, 4, u32> stream_out_mask;
+ } common0;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_low_size;
+ BitField<24, 8, u32> per_patch_attribute_count;
+ } common1;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_high_size;
+ BitField<24, 8, u32> threads_per_input_primitive;
+ } common2;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_crs_size;
+ BitField<24, 4, OutputTopology> output_topology;
+ BitField<28, 4, u32> reserved;
+ } common3;
+
+ union {
+ BitField<0, 12, u32> max_output_vertices;
+ BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+ BitField<20, 4, u32> reserved;
+ BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+ } common4;
+
+ union {
+ struct {
+ INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+
+ union {
+ BitField<0, 1, u8> primitive_array_id;
+ BitField<1, 1, u8> rt_array_index;
+ BitField<2, 1, u8> viewport_index;
+ BitField<3, 1, u8> point_size;
+ BitField<4, 1, u8> position_x;
+ BitField<5, 1, u8> position_y;
+ BitField<6, 1, u8> position_z;
+ BitField<7, 1, u8> position_w;
+ u8 raw;
+ } imap_systemb;
+
+ std::array<u8, 16> imap_generic_vector;
+
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
+ union {
+ BitField<0, 8, u16> clip_distances;
+ BitField<8, 1, u16> point_sprite_s;
+ BitField<9, 1, u16> point_sprite_t;
+ BitField<10, 1, u16> fog_coordinate;
+ BitField<12, 1, u16> tessellation_eval_point_u;
+ BitField<13, 1, u16> tessellation_eval_point_v;
+ BitField<14, 1, u16> instance_id;
+ BitField<15, 1, u16> vertex_id;
+ };
+ INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
+ INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
+
+ union {
+ BitField<0, 1, u8> primitive_array_id;
+ BitField<1, 1, u8> rt_array_index;
+ BitField<2, 1, u8> viewport_index;
+ BitField<3, 1, u8> point_size;
+ BitField<4, 1, u8> position_x;
+ BitField<5, 1, u8> position_y;
+ BitField<6, 1, u8> position_z;
+ BitField<7, 1, u8> position_w;
+ u8 raw;
+ } omap_systemb;
+
+ std::array<u8, 16> omap_generic_vector;
+
+ INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
+
+ union {
+ BitField<0, 8, u16> clip_distances;
+ BitField<8, 1, u16> point_sprite_s;
+ BitField<9, 1, u16> point_sprite_t;
+ BitField<10, 1, u16> fog_coordinate;
+ BitField<12, 1, u16> tessellation_eval_point_u;
+ BitField<13, 1, u16> tessellation_eval_point_v;
+ BitField<14, 1, u16> instance_id;
+ BitField<15, 1, u16> vertex_id;
+ } omap_systemc;
+
+ INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
+
+ [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept {
+ const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)};
+ return {
+ (data & 1) != 0,
+ (data & 2) != 0,
+ (data & 4) != 0,
+ (data & 8) != 0,
+ };
+ }
+
+ [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept {
+ const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)};
+ return {
+ (data & 1) != 0,
+ (data & 2) != 0,
+ (data & 4) != 0,
+ (data & 8) != 0,
+ };
+ }
+ } vtg;
+
+ struct {
+ INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
+
+ union {
+ BitField<0, 1, u8> primitive_array_id;
+ BitField<1, 1, u8> rt_array_index;
+ BitField<2, 1, u8> viewport_index;
+ BitField<3, 1, u8> point_size;
+ BitField<4, 1, u8> position_x;
+ BitField<5, 1, u8> position_y;
+ BitField<6, 1, u8> position_z;
+ BitField<7, 1, u8> position_w;
+ BitField<0, 4, u8> first;
+ BitField<4, 4, u8> position;
+ u8 raw;
+ } imap_systemb;
+
+ union {
+ BitField<0, 2, PixelImap> x;
+ BitField<2, 2, PixelImap> y;
+ BitField<4, 2, PixelImap> z;
+ BitField<6, 2, PixelImap> w;
+ u8 raw;
+ } imap_generic_vector[32];
+
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
+
+ struct {
+ u32 target;
+ union {
+ BitField<0, 1, u32> sample_mask;
+ BitField<1, 1, u32> depth;
+ BitField<2, 30, u32> reserved;
+ };
+ } omap;
+
+ [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
+ const u32 bits{omap.target >> (rt * 4)};
+ return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
+ }
+
+ [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
+ const auto& vector{imap_generic_vector[attribute]};
+ return {vector.x, vector.y, vector.z, vector.w};
+ }
+
+ [[nodiscard]] bool IsGenericVectorActive(size_t index) const {
+ return imap_generic_vector[index].raw != 0;
+ }
+ } ps;
+
+ std::array<u32, 0xf> raw;
+ };
+
+ [[nodiscard]] u64 LocalMemorySize() const noexcept {
+ return static_cast<u64>(common1.shader_local_memory_low_size) |
+ (static_cast<u64>(common2.shader_local_memory_high_size) << 24);
+ }
+};
+static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
+
+} // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
new file mode 100644
index 000000000..f3f83a258
--- /dev/null
+++ b/src/shader_recompiler/runtime_info.h
@@ -0,0 +1,88 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <optional>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/varying_state.h"
+
+namespace Shader {
+
+enum class AttributeType : u8 {
+ Float,
+ SignedInt,
+ UnsignedInt,
+ Disabled,
+};
+
+enum class InputTopology {
+ Points,
+ Lines,
+ LinesAdjacency,
+ Triangles,
+ TrianglesAdjacency,
+};
+
+enum class CompareFunction {
+ Never,
+ Less,
+ Equal,
+ LessThanEqual,
+ Greater,
+ NotEqual,
+ GreaterThanEqual,
+ Always,
+};
+
+enum class TessPrimitive {
+ Isolines,
+ Triangles,
+ Quads,
+};
+
+enum class TessSpacing {
+ Equal,
+ FractionalOdd,
+ FractionalEven,
+};
+
+struct TransformFeedbackVarying {
+ u32 buffer{};
+ u32 stride{};
+ u32 offset{};
+ u32 components{};
+};
+
+struct RuntimeInfo {
+ std::array<AttributeType, 32> generic_input_types{};
+ VaryingState previous_stage_stores;
+
+ bool convert_depth_mode{};
+ bool force_early_z{};
+
+ TessPrimitive tess_primitive{};
+ TessSpacing tess_spacing{};
+ bool tess_clockwise{};
+
+ InputTopology input_topology{};
+
+ std::optional<float> fixed_state_point_size;
+ std::optional<CompareFunction> alpha_test_func;
+ float alpha_test_reference{};
+
+ /// Static Y negate value
+ bool y_negate{};
+ /// Use storage buffers instead of global pointers on GLASM
+ bool glasm_use_storage_buffers{};
+
+ /// Transform feedback state for each varying
+ std::vector<TransformFeedbackVarying> xfb_varyings;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
new file mode 100644
index 000000000..4ef4dbd40
--- /dev/null
+++ b/src/shader_recompiler/shader_info.h
@@ -0,0 +1,193 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/type.h"
+#include "shader_recompiler/varying_state.h"
+
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+namespace Shader {
+
+enum class TextureType : u32 {
+ Color1D,
+ ColorArray1D,
+ Color2D,
+ ColorArray2D,
+ Color3D,
+ ColorCube,
+ ColorArrayCube,
+ Buffer,
+};
+constexpr u32 NUM_TEXTURE_TYPES = 8;
+
+enum class ImageFormat : u32 {
+ Typeless,
+ R8_UINT,
+ R8_SINT,
+ R16_UINT,
+ R16_SINT,
+ R32_UINT,
+ R32G32_UINT,
+ R32G32B32A32_UINT,
+};
+
+enum class Interpolation {
+ Smooth,
+ Flat,
+ NoPerspective,
+};
+
+struct ConstantBufferDescriptor {
+ u32 index;
+ u32 count;
+};
+
+struct StorageBufferDescriptor {
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 count;
+ bool is_written;
+};
+
+struct TextureBufferDescriptor {
+ bool has_secondary;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 secondary_cbuf_index;
+ u32 secondary_cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
+
+struct ImageBufferDescriptor {
+ ImageFormat format;
+ bool is_written;
+ bool is_read;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
+
+struct TextureDescriptor {
+ TextureType type;
+ bool is_depth;
+ bool has_secondary;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 secondary_cbuf_index;
+ u32 secondary_cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
+
+struct ImageDescriptor {
+ TextureType type;
+ ImageFormat format;
+ bool is_written;
+ bool is_read;
+ u32 cbuf_index;
+ u32 cbuf_offset;
+ u32 count;
+ u32 size_shift;
+};
+using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
+
+struct Info {
+ static constexpr size_t MAX_CBUFS{18};
+ static constexpr size_t MAX_SSBOS{32};
+
+ bool uses_workgroup_id{};
+ bool uses_local_invocation_id{};
+ bool uses_invocation_id{};
+ bool uses_sample_id{};
+ bool uses_is_helper_invocation{};
+ bool uses_subgroup_invocation_id{};
+ bool uses_subgroup_shuffles{};
+ std::array<bool, 30> uses_patches{};
+
+ std::array<Interpolation, 32> interpolation{};
+ VaryingState loads;
+ VaryingState stores;
+ VaryingState passthrough;
+
+ bool loads_indexed_attributes{};
+
+ std::array<bool, 8> stores_frag_color{};
+ bool stores_sample_mask{};
+ bool stores_frag_depth{};
+
+ bool stores_tess_level_outer{};
+ bool stores_tess_level_inner{};
+
+ bool stores_indexed_attributes{};
+
+ bool stores_global_memory{};
+
+ bool uses_fp16{};
+ bool uses_fp64{};
+ bool uses_fp16_denorms_flush{};
+ bool uses_fp16_denorms_preserve{};
+ bool uses_fp32_denorms_flush{};
+ bool uses_fp32_denorms_preserve{};
+ bool uses_int8{};
+ bool uses_int16{};
+ bool uses_int64{};
+ bool uses_image_1d{};
+ bool uses_sampled_1d{};
+ bool uses_sparse_residency{};
+ bool uses_demote_to_helper_invocation{};
+ bool uses_subgroup_vote{};
+ bool uses_subgroup_mask{};
+ bool uses_fswzadd{};
+ bool uses_derivatives{};
+ bool uses_typeless_image_reads{};
+ bool uses_typeless_image_writes{};
+ bool uses_image_buffers{};
+ bool uses_shared_increment{};
+ bool uses_shared_decrement{};
+ bool uses_global_increment{};
+ bool uses_global_decrement{};
+ bool uses_atomic_f32_add{};
+ bool uses_atomic_f16x2_add{};
+ bool uses_atomic_f16x2_min{};
+ bool uses_atomic_f16x2_max{};
+ bool uses_atomic_f32x2_add{};
+ bool uses_atomic_f32x2_min{};
+ bool uses_atomic_f32x2_max{};
+ bool uses_atomic_s32_min{};
+ bool uses_atomic_s32_max{};
+ bool uses_int64_bit_atomics{};
+ bool uses_global_memory{};
+ bool uses_atomic_image_u32{};
+ bool uses_shadow_lod{};
+
+ IR::Type used_constant_buffer_types{};
+ IR::Type used_storage_buffer_types{};
+
+ u32 constant_buffer_mask{};
+ std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
+ u32 nvn_buffer_base{};
+ std::bitset<16> nvn_buffer_used{};
+
+ boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
+ constant_buffer_descriptors;
+ boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
+ TextureBufferDescriptors texture_buffer_descriptors;
+ ImageBufferDescriptors image_buffer_descriptors;
+ TextureDescriptors texture_descriptors;
+ ImageDescriptors image_descriptors;
+};
+
+} // namespace Shader
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 000000000..5c1c8d8fc
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,28 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Shader {
+
+enum class Stage : u32 {
+ VertexB,
+ TessellationControl,
+ TessellationEval,
+ Geometry,
+ Fragment,
+
+ Compute,
+
+ VertexA,
+};
+constexpr u32 MaxStageTypes = 6;
+
+[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
+ return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index);
+}
+
+} // namespace Shader
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
new file mode 100644
index 000000000..9d7b24a76
--- /dev/null
+++ b/src/shader_recompiler/varying_state.h
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <cstddef>
+
+#include "shader_recompiler/frontend/ir/attribute.h"
+
+namespace Shader {
+
+struct VaryingState {
+ std::bitset<256> mask{};
+
+ void Set(IR::Attribute attribute, bool state = true) {
+ mask[static_cast<size_t>(attribute)] = state;
+ }
+
+ [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept {
+ return mask[static_cast<size_t>(attribute)];
+ }
+
+ [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept {
+ return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] ||
+ mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3];
+ }
+
+ [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept {
+ return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] &&
+ mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3];
+ }
+
+ [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept {
+ return AnyComponent(base) == AllComponents(base);
+ }
+
+ [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept {
+ return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component];
+ }
+
+ [[nodiscard]] bool Generic(size_t index) const noexcept {
+ return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3);
+ }
+
+ [[nodiscard]] bool ClipDistances() const noexcept {
+ return AnyComponent(IR::Attribute::ClipDistance0) ||
+ AnyComponent(IR::Attribute::ClipDistance4);
+ }
+
+ [[nodiscard]] bool Legacy() const noexcept {
+ return AnyComponent(IR::Attribute::ColorFrontDiffuseR) ||
+ AnyComponent(IR::Attribute::ColorFrontSpecularR) ||
+ AnyComponent(IR::Attribute::ColorBackDiffuseR) ||
+ AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture();
+ }
+
+ [[nodiscard]] bool FixedFunctionTexture() const noexcept {
+ for (size_t index = 0; index < 10; ++index) {
+ if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace Shader
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 96bc30cac..c4c012f3d 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -5,6 +5,7 @@ add_executable(tests
common/host_memory.cpp
common/param_package.cpp
common/ring_buffer.cpp
+ common/unique_function.cpp
core/core_timing.cpp
core/network/network.cpp
tests.cpp
diff --git a/src/tests/common/host_memory.cpp b/src/tests/common/host_memory.cpp
index e241f8be5..2dc7b5d5e 100644
--- a/src/tests/common/host_memory.cpp
+++ b/src/tests/common/host_memory.cpp
@@ -5,11 +5,13 @@
#include <catch2/catch.hpp>
#include "common/host_memory.h"
+#include "common/literals.h"
using Common::HostMemory;
+using namespace Common::Literals;
static constexpr size_t VIRTUAL_SIZE = 1ULL << 39;
-static constexpr size_t BACKING_SIZE = 4ULL * 1024 * 1024 * 1024;
+static constexpr size_t BACKING_SIZE = 4_GiB;
TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
{ HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp
new file mode 100644
index 000000000..aa6e86593
--- /dev/null
+++ b/src/tests/common/unique_function.cpp
@@ -0,0 +1,110 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+
+#include <catch2/catch.hpp>
+
+#include "common/unique_function.h"
+
+namespace {
+struct Noisy {
+ Noisy() : state{"Default constructed"} {}
+ Noisy(Noisy&& rhs) noexcept : state{"Move constructed"} {
+ rhs.state = "Moved away";
+ }
+ Noisy& operator=(Noisy&& rhs) noexcept {
+ state = "Move assigned";
+ rhs.state = "Moved away";
+ return *this;
+ }
+ Noisy(const Noisy&) : state{"Copied constructed"} {}
+ Noisy& operator=(const Noisy&) {
+ state = "Copied assigned";
+ return *this;
+ }
+
+ std::string state;
+};
+} // Anonymous namespace
+
+TEST_CASE("UniqueFunction", "[common]") {
+ SECTION("Capture reference") {
+ int value = 0;
+ Common::UniqueFunction<void> func = [&value] { value = 5; };
+ func();
+ REQUIRE(value == 5);
+ }
+ SECTION("Capture pointer") {
+ int value = 0;
+ int* pointer = &value;
+ Common::UniqueFunction<void> func = [pointer] { *pointer = 5; };
+ func();
+ REQUIRE(value == 5);
+ }
+ SECTION("Move object") {
+ Noisy noisy;
+ REQUIRE(noisy.state == "Default constructed");
+
+ Common::UniqueFunction<void> func = [noisy = std::move(noisy)] {
+ REQUIRE(noisy.state == "Move constructed");
+ };
+ REQUIRE(noisy.state == "Moved away");
+ func();
+ }
+ SECTION("Move construct function") {
+ int value = 0;
+ Common::UniqueFunction<void> func = [&value] { value = 5; };
+ Common::UniqueFunction<void> new_func = std::move(func);
+ new_func();
+ REQUIRE(value == 5);
+ }
+ SECTION("Move assign function") {
+ int value = 0;
+ Common::UniqueFunction<void> func = [&value] { value = 5; };
+ Common::UniqueFunction<void> new_func;
+ new_func = std::move(func);
+ new_func();
+ REQUIRE(value == 5);
+ }
+ SECTION("Default construct then assign function") {
+ int value = 0;
+ Common::UniqueFunction<void> func;
+ func = [&value] { value = 5; };
+ func();
+ REQUIRE(value == 5);
+ }
+ SECTION("Pass arguments") {
+ int result = 0;
+ Common::UniqueFunction<void, int, int> func = [&result](int a, int b) { result = a + b; };
+ func(5, 4);
+ REQUIRE(result == 9);
+ }
+ SECTION("Pass arguments and return value") {
+ Common::UniqueFunction<int, int, int> func = [](int a, int b) { return a + b; };
+ REQUIRE(func(5, 4) == 9);
+ }
+ SECTION("Destructor") {
+ int num_destroyed = 0;
+ struct Foo {
+ Foo(int* num_) : num{num_} {}
+ Foo(Foo&& rhs) : num{std::exchange(rhs.num, nullptr)} {}
+ Foo(const Foo&) = delete;
+
+ ~Foo() {
+ if (num) {
+ ++*num;
+ }
+ }
+
+ int* num = nullptr;
+ };
+ Foo object{&num_destroyed};
+ {
+ Common::UniqueFunction<void> func = [object = std::move(object)] {};
+ REQUIRE(num_destroyed == 0);
+ }
+ REQUIRE(num_destroyed == 1);
+ }
+}
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index edced69bb..9f5a54de4 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -536,7 +536,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
REQUIRE(rasterizer.Count() == 63);
buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
int num = 0;
- buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+ buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
REQUIRE(num == 0);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f9454bbaa..007ecc13e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library(video_core STATIC
dirty_flags.h
dma_pusher.cpp
dma_pusher.h
- engines/const_buffer_engine_interface.h
engines/const_buffer_info.h
engines/engine_interface.h
engines/engine_upload.cpp
@@ -44,9 +43,6 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
- engines/shader_bytecode.h
- engines/shader_header.h
- engines/shader_type.h
framebuffer_config.h
macro/macro.cpp
macro/macro.h
@@ -61,8 +57,6 @@ add_library(video_core STATIC
gpu.h
gpu_thread.cpp
gpu_thread.h
- guest_driver.cpp
- guest_driver.h
memory_manager.cpp
memory_manager.h
query_cache.h
@@ -71,26 +65,25 @@ add_library(video_core STATIC
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
- renderer_opengl/gl_arb_decompiler.cpp
- renderer_opengl/gl_arb_decompiler.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
+ renderer_opengl/gl_compute_pipeline.cpp
+ renderer_opengl/gl_compute_pipeline.h
renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
+ renderer_opengl/gl_graphics_pipeline.cpp
+ renderer_opengl/gl_graphics_pipeline.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
renderer_opengl/gl_shader_cache.cpp
renderer_opengl/gl_shader_cache.h
- renderer_opengl/gl_shader_decompiler.cpp
- renderer_opengl/gl_shader_decompiler.h
- renderer_opengl/gl_shader_disk_cache.cpp
- renderer_opengl/gl_shader_disk_cache.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
+ renderer_opengl/gl_shader_context.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp
@@ -112,6 +105,7 @@ add_library(video_core STATIC
renderer_vulkan/fixed_pipeline_state.h
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
+ renderer_vulkan/pipeline_helper.h
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp
@@ -138,12 +132,12 @@ add_library(video_core STATIC
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
+ renderer_vulkan/vk_render_pass_cache.cpp
+ renderer_vulkan/vk_render_pass_cache.h
renderer_vulkan/vk_resource_pool.cpp
renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
- renderer_vulkan/vk_shader_decompiler.cpp
- renderer_vulkan/vk_shader_decompiler.h
renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -156,60 +150,12 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
+ shader_cache.cpp
shader_cache.h
+ shader_environment.cpp
+ shader_environment.h
shader_notify.cpp
shader_notify.h
- shader/decode/arithmetic.cpp
- shader/decode/arithmetic_immediate.cpp
- shader/decode/bfe.cpp
- shader/decode/bfi.cpp
- shader/decode/shift.cpp
- shader/decode/arithmetic_integer.cpp
- shader/decode/arithmetic_integer_immediate.cpp
- shader/decode/arithmetic_half.cpp
- shader/decode/arithmetic_half_immediate.cpp
- shader/decode/ffma.cpp
- shader/decode/hfma2.cpp
- shader/decode/conversion.cpp
- shader/decode/memory.cpp
- shader/decode/texture.cpp
- shader/decode/image.cpp
- shader/decode/float_set_predicate.cpp
- shader/decode/integer_set_predicate.cpp
- shader/decode/half_set_predicate.cpp
- shader/decode/predicate_set_register.cpp
- shader/decode/predicate_set_predicate.cpp
- shader/decode/register_set_predicate.cpp
- shader/decode/float_set.cpp
- shader/decode/integer_set.cpp
- shader/decode/half_set.cpp
- shader/decode/video.cpp
- shader/decode/warp.cpp
- shader/decode/xmad.cpp
- shader/decode/other.cpp
- shader/ast.cpp
- shader/ast.h
- shader/async_shaders.cpp
- shader/async_shaders.h
- shader/compiler_settings.cpp
- shader/compiler_settings.h
- shader/control_flow.cpp
- shader/control_flow.h
- shader/decode.cpp
- shader/expr.cpp
- shader/expr.h
- shader/memory_util.cpp
- shader/memory_util.h
- shader/node_helper.cpp
- shader/node_helper.h
- shader/node.h
- shader/registry.cpp
- shader/registry.h
- shader/shader_ir.cpp
- shader/shader_ir.h
- shader/track.cpp
- shader/transform_feedback.cpp
- shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/accelerated_swizzle.cpp
@@ -242,6 +188,8 @@ add_library(video_core STATIC
textures/decoders.h
textures/texture.cpp
textures/texture.h
+ transform_feedback.cpp
+ transform_feedback.h
video_core.cpp
video_core.h
vulkan_common/vulkan_debug_callback.cpp
@@ -265,7 +213,7 @@ add_library(video_core STATIC
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad xbyak)
+target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
add_dependencies(video_core ffmpeg-build)
@@ -292,12 +240,12 @@ endif()
if (MSVC)
target_compile_options(video_core PRIVATE
- /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
+ /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
+ /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
/we4456 # Declaration of 'identifier' hides previous local declaration
/we4457 # Declaration of 'identifier' hides function parameter
/we4458 # Declaration of 'identifier' hides class member
/we4459 # Declaration of 'identifier' hides global declaration
- /we4715 # 'function' : not all control paths return a value
)
else()
target_compile_options(video_core PRIVATE
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index a39505903..c3318095c 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -226,19 +226,24 @@ public:
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
- ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
+ ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
- void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
- ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
+ void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) {
+ ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func);
+ }
+
+ template <typename Func>
+ void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) {
+ ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
void ForEachDownloadRange(Func&& func) {
- ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
+ ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func);
}
/// Mark buffer as picked
@@ -256,6 +261,16 @@ public:
stream_score += score;
}
+ /// Sets the new frame tick
+ void SetFrameTick(u64 new_frame_tick) noexcept {
+ frame_tick = new_frame_tick;
+ }
+
+ /// Returns the new frame tick
+ [[nodiscard]] u64 FrameTick() const noexcept {
+ return frame_tick;
+ }
+
/// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept {
return stream_score;
@@ -405,7 +420,7 @@ private:
* @param func Function to call for each turned off region
*/
template <Type type, typename Func>
- void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+ void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) {
static_assert(type != Type::Untracked);
const s64 difference = query_cpu_range - cpu_addr;
@@ -457,7 +472,9 @@ private:
bits = (bits << left_offset) >> left_offset;
const u64 current_word = state_words[word_index] & bits;
- state_words[word_index] &= ~bits;
+ if (clear) {
+ state_words[word_index] &= ~bits;
+ }
if constexpr (type == Type::CPU) {
const u64 current_bits = untracked_words[word_index] & bits;
@@ -586,6 +603,7 @@ private:
RasterizerInterface* rasterizer = nullptr;
VAddr cpu_addr = 0;
Words words;
+ u64 frame_tick = 0;
BufferFlagBits flags{};
int stream_score = 0;
};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d371b842f..3b43554f9 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -15,9 +15,11 @@
#include <vector>
#include <boost/container/small_vector.hpp>
+#include <boost/icl/interval_set.hpp>
#include "common/common_types.h"
#include "common/div_ceil.h"
+#include "common/literals.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
@@ -29,6 +31,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
@@ -40,15 +43,23 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
using BufferId = SlotId;
+using VideoCore::Surface::PixelFormat;
+using namespace Common::Literals;
+
constexpr u32 NUM_VERTEX_BUFFERS = 32;
constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_TEXTURE_BUFFERS = 16;
constexpr u32 NUM_STAGES = 5;
+using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
+using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
+
template <typename P>
class BufferCache {
+
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
static constexpr u32 PAGE_BITS = 16;
@@ -62,14 +73,21 @@ class BufferCache {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+ static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr BufferId NULL_BUFFER_ID{0};
+ static constexpr u64 EXPECTED_MEMORY = 512_MiB;
+ static constexpr u64 CRITICAL_MEMORY = 1_GiB;
+
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Runtime = typename P::Runtime;
using Buffer = typename P::Buffer;
+ using IntervalSet = boost::icl::interval_set<VAddr>;
+ using IntervalType = typename IntervalSet::interval_type;
+
struct Empty {};
struct OverlapResult {
@@ -85,6 +103,10 @@ class BufferCache {
BufferId buffer_id;
};
+ struct TextureBufferBinding : Binding {
+ PixelFormat format;
+ };
+
static constexpr Binding NULL_BINDING{
.cpu_addr = 0,
.size = 0,
@@ -92,7 +114,7 @@ class BufferCache {
};
public:
- static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096;
+ static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -122,38 +144,63 @@ public:
void BindHostComputeBuffers();
- void SetEnabledUniformBuffers(size_t stage, u32 enabled);
+ void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
+ const UniformBufferSizes* sizes);
- void SetEnabledComputeUniformBuffers(u32 enabled);
+ void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
void UnbindGraphicsStorageBuffers(size_t stage);
void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
bool is_written);
+ void UnbindGraphicsTextureBuffers(size_t stage);
+
+ void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
+ PixelFormat format, bool is_written, bool is_image);
+
void UnbindComputeStorageBuffers();
void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
bool is_written);
+ void UnbindComputeTextureBuffers();
+
+ void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
+ bool is_written, bool is_image);
+
void FlushCachedWrites();
/// Return true when there are uncommitted buffers to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
+ void AccumulateFlushes();
+
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
+ void CommitAsyncFlushesHigh();
/// Pop asynchronous downloads
void PopAsyncFlushes();
+ bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
+
+ bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
+
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+ /// Return true when a region is registered on the cache
+ [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
+
+ /// Return true when a CPU region is modified from the CPU
+ [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
+
std::mutex mutex;
+ Runtime& runtime;
private:
template <typename Func>
@@ -183,11 +230,43 @@ private:
}
}
+ template <typename Func>
+ void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
+ const VAddr start_address = cpu_addr;
+ const VAddr end_address = start_address + size;
+ const VAddr search_base =
+ static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
+ const IntervalType search_interval{search_base, search_base + 1};
+ auto it = common_ranges.lower_bound(search_interval);
+ if (it == common_ranges.end()) {
+ it = common_ranges.begin();
+ }
+ for (; it != common_ranges.end(); it++) {
+ VAddr inter_addr_end = it->upper();
+ VAddr inter_addr = it->lower();
+ if (inter_addr >= end_address) {
+ break;
+ }
+ if (inter_addr_end <= start_address) {
+ continue;
+ }
+ if (inter_addr_end > end_address) {
+ inter_addr_end = end_address;
+ }
+ if (inter_addr < start_address) {
+ inter_addr = start_address;
+ }
+ func(inter_addr, inter_addr_end);
+ }
+ }
+
static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
}
+ void RunGarbageCollector();
+
void BindHostIndexBuffer();
void BindHostVertexBuffers();
@@ -198,12 +277,16 @@ private:
void BindHostGraphicsStorageBuffers(size_t stage);
+ void BindHostGraphicsTextureBuffers(size_t stage);
+
void BindHostTransformFeedbackBuffers();
void BindHostComputeUniformBuffers();
void BindHostComputeStorageBuffers();
+ void BindHostComputeTextureBuffers();
+
void DoUpdateGraphicsBuffers(bool is_indexed);
void DoUpdateComputeBuffers();
@@ -218,6 +301,8 @@ private:
void UpdateStorageBuffers(size_t stage);
+ void UpdateTextureBuffers(size_t stage);
+
void UpdateTransformFeedbackBuffers();
void UpdateTransformFeedbackBuffer(u32 index);
@@ -226,6 +311,8 @@ private:
void UpdateComputeStorageBuffers();
+ void UpdateComputeTextureBuffers();
+
void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
[[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
@@ -243,6 +330,8 @@ private:
template <bool insert>
void ChangeRegister(BufferId buffer_id);
+ void TouchBuffer(Buffer& buffer) const noexcept;
+
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
@@ -255,26 +344,32 @@ private:
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
- void DeleteBuffer(BufferId buffer_id);
+ void DownloadBufferMemory(Buffer& buffer_id);
+
+ void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
- void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
+ void DeleteBuffer(BufferId buffer_id);
void NotifyBufferDeletion();
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+ [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
+ PixelFormat format);
+
[[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
[[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
[[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
+ void ClearDownload(IntervalType subtract_interval);
+
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
- Runtime& runtime;
SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -285,20 +380,30 @@ private:
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+ std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+ std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
- std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
- u32 enabled_compute_uniform_buffers = 0;
+ std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
+ u32 enabled_compute_uniform_buffer_mask = 0;
+
+ const UniformBufferSizes* uniform_buffer_sizes{};
+ const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
std::array<u32, NUM_STAGES> written_storage_buffers{};
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
- std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+ std::array<u32, NUM_STAGES> enabled_texture_buffers{};
+ std::array<u32, NUM_STAGES> written_texture_buffers{};
+ std::array<u32, NUM_STAGES> image_texture_buffers{};
+ u32 enabled_compute_texture_buffers = 0;
+ u32 written_compute_texture_buffers = 0;
+ u32 image_compute_texture_buffers = 0;
std::array<u32, 16> uniform_cache_hits{};
std::array<u32, 16> uniform_cache_shots{};
@@ -309,16 +414,24 @@ private:
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
dirty_uniform_buffers{};
+ std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
+ std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
+ std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
+ uniform_buffer_binding_sizes{};
std::vector<BufferId> cached_write_buffer_ids;
- // TODO: This data structure is not optimal and it should be reworked
- std::vector<BufferId> uncommitted_downloads;
- std::deque<std::vector<BufferId>> committed_downloads;
+ IntervalSet uncommitted_ranges;
+ IntervalSet common_ranges;
+ std::deque<IntervalSet> committed_ranges;
size_t immediate_buffer_capacity = 0;
std::unique_ptr<u8[]> immediate_buffer_alloc;
+ typename SlotVector<Buffer>::Iterator deletion_iterator;
+ u64 frame_tick = 0;
+ u64 total_used_memory = 0;
+
std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
};
@@ -328,10 +441,33 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_)
- : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
- gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+ : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
+ deletion_iterator = slot_buffers.end();
+ common_ranges.clear();
+}
+
+template <class P>
+void BufferCache<P>::RunGarbageCollector() {
+ const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
+ const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
+ int num_iterations = aggressive_gc ? 64 : 32;
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_buffers.end()) {
+ deletion_iterator = slot_buffers.begin();
+ }
+ ++deletion_iterator;
+ if (deletion_iterator == slot_buffers.end()) {
+ break;
+ }
+ const auto [buffer_id, buffer] = *deletion_iterator;
+ if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
+ DownloadBufferMemory(*buffer);
+ DeleteBuffer(buffer_id);
+ }
+ }
}
template <class P>
@@ -349,6 +485,10 @@ void BufferCache<P>::TickFrame() {
const bool skip_preferred = hits * 256 < shots * 251;
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
+ if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
+ RunGarbageCollector();
+ }
+ ++frame_tick;
delayed_destruction_ring.Tick();
}
@@ -372,52 +512,102 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
template <class P>
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
- boost::container::small_vector<BufferCopy, 1> copies;
- u64 total_size_bytes = 0;
- u64 largest_copy = 0;
- buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
- copies.push_back(BufferCopy{
- .src_offset = range_offset,
- .dst_offset = total_size_bytes,
- .size = range_size,
- });
- total_size_bytes += range_size;
- largest_copy = std::max(largest_copy, range_size);
- });
- if (total_size_bytes == 0) {
- return;
- }
- MICROPROFILE_SCOPE(GPU_DownloadMemory);
-
- if constexpr (USE_MEMORY_MAPS) {
- auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
- const u8* const mapped_memory = download_staging.mapped_span.data();
- const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
- for (BufferCopy& copy : copies) {
- // Modify copies to have the staging offset in mind
- copy.dst_offset += download_staging.offset;
- }
- runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
- runtime.Finish();
- for (const BufferCopy& copy : copies) {
- const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
- // Undo the modified offset
- const u64 dst_offset = copy.dst_offset - download_staging.offset;
- const u8* copy_mapped_memory = mapped_memory + dst_offset;
- cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
- }
- } else {
- const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
- for (const BufferCopy& copy : copies) {
- buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
- const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
- cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
- }
- }
+ DownloadBufferMemory(buffer, cpu_addr, size);
});
}
template <class P>
+void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
+ uncommitted_ranges.subtract(subtract_interval);
+ for (auto& interval_set : committed_ranges) {
+ interval_set.subtract(subtract_interval);
+ }
+}
+
+template <class P>
+bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
+ const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+ if (!cpu_src_address || !cpu_dest_address) {
+ return false;
+ }
+ const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount);
+ const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount);
+ if (!source_dirty && !dest_dirty) {
+ return false;
+ }
+
+ const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
+ ClearDownload(subtract_interval);
+
+ BufferId buffer_a;
+ BufferId buffer_b;
+ do {
+ has_deleted_buffers = false;
+ buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
+ buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
+ } while (has_deleted_buffers);
+ auto& src_buffer = slot_buffers[buffer_a];
+ auto& dest_buffer = slot_buffers[buffer_b];
+ SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
+ SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
+ std::array copies{BufferCopy{
+ .src_offset = src_buffer.Offset(*cpu_src_address),
+ .dst_offset = dest_buffer.Offset(*cpu_dest_address),
+ .size = amount,
+ }};
+
+ boost::container::small_vector<IntervalType, 4> tmp_intervals;
+ auto mirror = [&](VAddr base_address, VAddr base_address_end) {
+ const u64 size = base_address_end - base_address;
+ const VAddr diff = base_address - *cpu_src_address;
+ const VAddr new_base_address = *cpu_dest_address + diff;
+ const IntervalType add_interval{new_base_address, new_base_address + size};
+ uncommitted_ranges.add(add_interval);
+ tmp_intervals.push_back(add_interval);
+ };
+ ForEachWrittenRange(*cpu_src_address, amount, mirror);
+ // This subtraction in this order is important for overlapping copies.
+ common_ranges.subtract(subtract_interval);
+ bool atleast_1_download = tmp_intervals.size() != 0;
+ for (const IntervalType add_interval : tmp_intervals) {
+ common_ranges.add(add_interval);
+ }
+
+ runtime.CopyBuffer(dest_buffer, src_buffer, copies);
+ if (atleast_1_download) {
+ dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
+ }
+ std::vector<u8> tmp_buffer(amount);
+ cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
+ cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
+ return true;
+}
+
+template <class P>
+bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
+ const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+ if (!cpu_dst_address) {
+ return false;
+ }
+ const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount);
+ if (!dest_dirty) {
+ return false;
+ }
+
+ const size_t size = amount * sizeof(u32);
+ const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size};
+ ClearDownload(subtract_interval);
+ common_ranges.subtract(subtract_interval);
+
+ const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
+ auto& dest_buffer = slot_buffers[buffer];
+ const u32 offset = dest_buffer.Offset(*cpu_dst_address);
+ runtime.ClearBuffer(dest_buffer, offset, size, value);
+ return true;
+}
+
+template <class P>
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -472,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) {
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
BindHostGraphicsUniformBuffers(stage);
BindHostGraphicsStorageBuffers(stage);
+ BindHostGraphicsTextureBuffers(stage);
}
template <class P>
@@ -479,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() {
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
BindHostComputeUniformBuffers();
BindHostComputeStorageBuffers();
+ BindHostComputeTextureBuffers();
}
template <class P>
-void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
+ const UniformBufferSizes* sizes) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
- if (enabled_uniform_buffers[stage] != enabled) {
- dirty_uniform_buffers[stage] = ~u32{0};
+ if (enabled_uniform_buffer_masks != mask) {
+ if constexpr (IS_OPENGL) {
+ fast_bound_uniform_buffers.fill(0);
+ }
+ dirty_uniform_buffers.fill(~u32{0});
+ uniform_buffer_binding_sizes.fill({});
}
}
- enabled_uniform_buffers[stage] = enabled;
+ enabled_uniform_buffer_masks = mask;
+ uniform_buffer_sizes = sizes;
}
template <class P>
-void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
- enabled_compute_uniform_buffers = enabled;
+void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
+ const ComputeUniformBufferSizes* sizes) {
+ enabled_compute_uniform_buffer_mask = mask;
+ compute_uniform_buffer_sizes = sizes;
}
template <class P>
@@ -514,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
}
template <class P>
+void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
+ enabled_texture_buffers[stage] = 0;
+ written_texture_buffers[stage] = 0;
+ image_texture_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
+ u32 size, PixelFormat format, bool is_written,
+ bool is_image) {
+ enabled_texture_buffers[stage] |= 1U << tbo_index;
+ written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
+ }
+ texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
+}
+
+template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
enabled_compute_storage_buffers = 0;
written_compute_storage_buffers = 0;
+ image_compute_texture_buffers = 0;
}
template <class P>
@@ -534,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
}
template <class P>
+void BufferCache<P>::UnbindComputeTextureBuffers() {
+ enabled_compute_texture_buffers = 0;
+ written_compute_texture_buffers = 0;
+ image_compute_texture_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
+ PixelFormat format, bool is_written, bool is_image) {
+ enabled_compute_texture_buffers |= 1U << tbo_index;
+ written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
+ }
+ compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
+}
+
+template <class P>
void BufferCache<P>::FlushCachedWrites() {
for (const BufferId buffer_id : cached_write_buffer_ids) {
slot_buffers[buffer_id].FlushCachedWrites();
@@ -543,29 +781,30 @@ void BufferCache<P>::FlushCachedWrites() {
template <class P>
bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
- return !uncommitted_downloads.empty();
+ return !uncommitted_ranges.empty() || !committed_ranges.empty();
}
template <class P>
-bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
- return !committed_downloads.empty() && !committed_downloads.front().empty();
+void BufferCache<P>::AccumulateFlushes() {
+ if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
+ uncommitted_ranges.clear();
+ return;
+ }
+ if (uncommitted_ranges.empty()) {
+ return;
+ }
+ committed_ranges.emplace_back(std::move(uncommitted_ranges));
}
template <class P>
-void BufferCache<P>::CommitAsyncFlushes() {
- // This is intentionally passing the value by copy
- committed_downloads.push_front(uncommitted_downloads);
- uncommitted_downloads.clear();
+bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return false;
}
template <class P>
-void BufferCache<P>::PopAsyncFlushes() {
- if (committed_downloads.empty()) {
- return;
- }
- auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
- const std::span<const BufferId> download_ids = committed_downloads.back();
- if (download_ids.empty()) {
+void BufferCache<P>::CommitAsyncFlushesHigh() {
+ AccumulateFlushes();
+ if (committed_ranges.empty()) {
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
@@ -573,20 +812,42 @@ void BufferCache<P>::PopAsyncFlushes() {
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
- for (const BufferId buffer_id : download_ids) {
- slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
- downloads.push_back({
- BufferCopy{
- .src_offset = range_offset,
- .dst_offset = total_size_bytes,
- .size = range_size,
- },
- buffer_id,
+ for (const IntervalSet& intervals : committed_ranges) {
+ for (auto& interval : intervals) {
+ const std::size_t size = interval.upper() - interval.lower();
+ const VAddr cpu_addr = interval.lower();
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
+ buffer.ForEachDownloadRangeAndClear(
+ cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ const VAddr buffer_addr = buffer.CpuAddr();
+ const auto add_download = [&](VAddr start, VAddr end) {
+ const u64 new_offset = start - buffer_addr;
+ const u64 new_size = end - start;
+ downloads.push_back({
+ BufferCopy{
+ .src_offset = new_offset,
+ .dst_offset = total_size_bytes,
+ .size = new_size,
+ },
+ buffer_id,
+ });
+ // Align up to avoid cache conflicts
+ constexpr u64 align = 256ULL;
+ constexpr u64 mask = ~(align - 1ULL);
+ total_size_bytes += (new_size + align - 1) & mask;
+ largest_copy = std::max(largest_copy, new_size);
+ };
+
+ const VAddr start_address = buffer_addr + range_offset;
+ const VAddr end_address = start_address + range_size;
+ ForEachWrittenRange(start_address, range_size, add_download);
+ const IntervalType subtract_interval{start_address, end_address};
+ common_ranges.subtract(subtract_interval);
+ });
});
- total_size_bytes += range_size;
- largest_copy = std::max(largest_copy, range_size);
- });
+ }
}
+ committed_ranges.clear();
if (downloads.empty()) {
return;
}
@@ -619,6 +880,19 @@ void BufferCache<P>::PopAsyncFlushes() {
}
template <class P>
+void BufferCache<P>::CommitAsyncFlushes() {
+ if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
+ CommitAsyncFlushesHigh();
+ } else {
+ uncommitted_ranges.clear();
+ committed_ranges.clear();
+ }
+}
+
+template <class P>
+void BufferCache<P>::PopAsyncFlushes() {}
+
+template <class P>
bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
for (u64 page = addr >> PAGE_BITS; page < page_end;) {
@@ -638,13 +912,56 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
}
template <class P>
+bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
+ const VAddr end_addr = addr + size;
+ const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ ++page;
+ continue;
+ }
+ Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr buf_start_addr = buffer.CpuAddr();
+ const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
+ if (buf_start_addr < end_addr && addr < buf_end_addr) {
+ return true;
+ }
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
+ }
+ return false;
+}
+
+template <class P>
+bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
+ const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId image_id = page_table[page];
+ if (!image_id) {
+ ++page;
+ continue;
+ }
+ Buffer& buffer = slot_buffers[image_id];
+ if (buffer.IsRegionCpuModified(addr, size)) {
+ return true;
+ }
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
+ }
+ return false;
+}
+
+template <class P>
void BufferCache<P>::BindHostIndexBuffer() {
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+ TouchBuffer(buffer);
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
- runtime.BindIndexBuffer(buffer, offset, size);
+ const u32 new_offset = offset + maxwell3d.regs.index_array.first *
+ maxwell3d.regs.index_array.FormatSizeInBytes();
+ runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
@@ -658,6 +975,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
@@ -677,7 +995,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
dirty = std::exchange(dirty_uniform_buffers[stage], 0);
}
u32 binding_index = 0;
- ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
const bool needs_bind = ((dirty >> index) & 1) != 0;
BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -691,8 +1009,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
bool needs_bind) {
const Binding& binding = uniform_buffers[stage][index];
const VAddr cpu_addr = binding.cpu_addr;
- const u32 size = binding.size;
+ const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= uniform_buffer_skip_cache_size &&
!buffer.IsRegionGpuModified(cpu_addr, size);
@@ -700,8 +1019,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
// Fast path for Nvidia
- if (!HasFastUniformBufferBound(stage, binding_index)) {
+ const bool should_fast_bind =
+ !HasFastUniformBufferBound(stage, binding_index) ||
+ uniform_buffer_binding_sizes[stage][binding_index] != size;
+ if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+ uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -709,8 +1033,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return;
}
}
- fast_bound_uniform_buffers[stage] |= 1U << binding_index;
-
+ if constexpr (IS_OPENGL) {
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+ uniform_buffer_binding_sizes[stage][binding_index] = size;
+ }
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -723,14 +1049,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
++uniform_cache_shots[0];
- if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
- // Skip binding if it's not needed and if the bound buffer is not the fast version
- // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ // Skip binding if it's not needed and if the bound buffer is not the fast version
+ // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ needs_bind |= HasFastUniformBufferBound(stage, binding_index);
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
+ }
+ if (!needs_bind) {
return;
}
- fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
-
const u32 offset = buffer.Offset(cpu_addr);
+ if constexpr (IS_OPENGL) {
+ // Fast buffer will be unbound
+ fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
+
+ // Mark the index as dirty if offset doesn't match
+ const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
+ dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
+ }
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ uniform_buffer_binding_sizes[stage][binding_index] = size;
+ }
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
} else {
@@ -744,6 +1083,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
const Binding& binding = storage_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -759,6 +1099,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
}
template <class P>
+void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
+ const TextureBufferBinding& binding = texture_buffers[stage][index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const PixelFormat format = binding.format;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ if (((image_texture_buffers[stage] >> index) & 1) != 0) {
+ runtime.BindImageBuffer(buffer, offset, size, format);
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ });
+}
+
+template <class P>
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
return;
@@ -766,6 +1128,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = transform_feedback_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -779,12 +1142,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
// Mark all uniform buffers as dirty
dirty_uniform_buffers.fill(~u32{0});
+ fast_bound_uniform_buffers.fill(0);
}
u32 binding_index = 0;
- ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
const Binding& binding = compute_uniform_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
- const u32 size = binding.size;
+ TouchBuffer(buffer);
+ const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -803,6 +1168,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
const Binding& binding = compute_storage_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -818,6 +1184,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
}
template <class P>
+void BufferCache<P>::BindHostComputeTextureBuffers() {
+ ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
+ const TextureBufferBinding& binding = compute_texture_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const PixelFormat format = binding.format;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ if (((image_compute_texture_buffers >> index) & 1) != 0) {
+ runtime.BindImageBuffer(buffer, offset, size, format);
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ });
+}
+
+template <class P>
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
if (is_indexed) {
UpdateIndexBuffer();
@@ -827,6 +1215,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
UpdateUniformBuffers(stage);
UpdateStorageBuffers(stage);
+ UpdateTextureBuffers(stage);
}
}
@@ -834,6 +1223,7 @@ template <class P>
void BufferCache<P>::DoUpdateComputeBuffers() {
UpdateComputeUniformBuffers();
UpdateComputeStorageBuffers();
+ UpdateComputeTextureBuffers();
}
template <class P>
@@ -852,7 +1242,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_end = index_array.EndAddress();
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
- const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+ const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
if (size == 0 || !cpu_addr) {
index_buffer = NULL_BINDING;
@@ -903,7 +1293,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
template <class P>
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
- ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
Binding& binding = uniform_buffers[stage][index];
if (binding.buffer_id) {
// Already updated
@@ -934,6 +1324,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
}
template <class P>
+void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
+ Binding& binding = texture_buffers[stage][index];
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ // Mark buffer as written if needed
+ if (((written_texture_buffers[stage] >> index) & 1) != 0) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
return;
@@ -964,7 +1366,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
template <class P>
void BufferCache<P>::UpdateComputeUniformBuffers() {
- ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
const auto& launch_desc = kepler_compute.launch_description;
@@ -985,11 +1387,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
// Resolve buffer
Binding& binding = compute_storage_buffers[index];
- const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
- binding.buffer_id = buffer_id;
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark as written if needed
if (((written_compute_storage_buffers >> index) & 1) != 0) {
- MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeTextureBuffers() {
+ ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
+ Binding& binding = compute_texture_buffers[index];
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ // Mark as written if needed
+ if (((written_compute_texture_buffers >> index) & 1) != 0) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
}
@@ -999,16 +1412,16 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
Buffer& buffer = slot_buffers[buffer_id];
buffer.MarkRegionAsGpuModified(cpu_addr, size);
- const bool is_accuracy_high = Settings::IsGPULevelHigh();
+ const IntervalType base_interval{cpu_addr, cpu_addr + size};
+ common_ranges.add(base_interval);
+
+ const bool is_accuracy_high =
+ Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
- if (!is_accuracy_high || !is_async) {
- return;
- }
- if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
- // Already inserted
+ if (!is_async && !is_accuracy_high) {
return;
}
- uncommitted_downloads.push_back(buffer_id);
+ uncommitted_ranges.add(base_interval);
}
template <class P>
@@ -1092,7 +1505,6 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
if (!copies.empty()) {
runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
}
- ReplaceBufferDownloads(overlap_id, new_buffer_id);
DeleteBuffer(overlap_id);
}
@@ -1101,6 +1513,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+ TouchBuffer(slot_buffers[new_buffer_id]);
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@@ -1122,8 +1535,14 @@ template <class P>
template <bool insert>
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
const Buffer& buffer = slot_buffers[buffer_id];
+ const auto size = buffer.SizeBytes();
+ if (insert) {
+ total_used_memory += Common::AlignUp(size, 1024);
+ } else {
+ total_used_memory -= Common::AlignUp(size, 1024);
+ }
const VAddr cpu_addr_begin = buffer.CpuAddr();
- const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
+ const VAddr cpu_addr_end = cpu_addr_begin + size;
const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
for (u64 page = page_begin; page != page_end; ++page) {
@@ -1136,6 +1555,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
}
template <class P>
+void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
+ buffer.SetFrameTick(frame_tick);
+}
+
+template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
if (buffer.CpuAddr() == 0) {
return true;
@@ -1212,6 +1636,72 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
}
template <class P>
+void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
+ DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
+}
+
+template <class P>
+void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ const VAddr buffer_addr = buffer.CpuAddr();
+ const auto add_download = [&](VAddr start, VAddr end) {
+ const u64 new_offset = start - buffer_addr;
+ const u64 new_size = end - start;
+ copies.push_back(BufferCopy{
+ .src_offset = new_offset,
+ .dst_offset = total_size_bytes,
+ .size = new_size,
+ });
+ // Align up to avoid cache conflicts
+ constexpr u64 align = 256ULL;
+ constexpr u64 mask = ~(align - 1ULL);
+ total_size_bytes += (new_size + align - 1) & mask;
+ largest_copy = std::max(largest_copy, new_size);
+ };
+
+ const VAddr start_address = buffer_addr + range_offset;
+ const VAddr end_address = start_address + range_size;
+ ForEachWrittenRange(start_address, range_size, add_download);
+ const IntervalType subtract_interval{start_address, end_address};
+ ClearDownload(subtract_interval);
+ common_ranges.subtract(subtract_interval);
+ });
+ if (total_size_bytes == 0) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ const u8* const mapped_memory = download_staging.mapped_span.data();
+ const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+ for (BufferCopy& copy : copies) {
+ // Modify copies to have the staging offset in mind
+ copy.dst_offset += download_staging.offset;
+ }
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.Finish();
+ for (const BufferCopy& copy : copies) {
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* copy_mapped_memory = mapped_memory + dst_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const BufferCopy& copy : copies) {
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
+ }
+ }
+}
+
+template <class P>
void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
const auto scalar_replace = [buffer_id](Binding& binding) {
if (binding.buffer_id == buffer_id) {
@@ -1236,26 +1726,16 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
Unregister(buffer_id);
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+ slot_buffers.erase(buffer_id);
NotifyBufferDeletion();
}
template <class P>
-void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
- const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
- std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
- if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
- buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
- }
- };
- replace(uncommitted_downloads);
- std::ranges::for_each(committed_downloads, replace);
-}
-
-template <class P>
void BufferCache<P>::NotifyBufferDeletion() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
dirty_uniform_buffers.fill(~u32{0});
+ uniform_buffer_binding_sizes.fill({});
}
auto& flags = maxwell3d.dirty.flags;
flags[Dirty::IndexBuffer] = true;
@@ -1274,21 +1754,34 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
- // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
- // It exists due to some games like Astral Chain operate out of bounds.
- // Binding the whole map range would be technically correct, but games have large maps that make
- // this approach unaffordable for now.
- static constexpr u32 arbitrary_extra_bytes = 0xc000;
- const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
const Binding binding{
.cpu_addr = *cpu_addr,
- .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
+ .size = size,
.buffer_id = BufferId{},
};
return binding;
}
template <class P>
+typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
+ GPUVAddr gpu_addr, u32 size, PixelFormat format) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ TextureBufferBinding binding;
+ if (!cpu_addr || size == 0) {
+ binding.cpu_addr = 0;
+ binding.size = 0;
+ binding.buffer_id = NULL_BUFFER_ID;
+ binding.format = PixelFormat::Invalid;
+ } else {
+ binding.cpu_addr = *cpu_addr;
+ binding.size = size;
+ binding.buffer_id = BufferId{};
+ binding.format = format;
+ }
+ return binding;
+}
+
+template <class P>
std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
if (IsRangeGranular(cpu_addr, size) ||
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a3fda1094..8b86ad050 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
static_cast<u32>(nvdec_thi_state.method_0));
- nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
- data);
+ nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
break;
default:
break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index d02dc6260..1b4bbc8ac 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
av_free(ptr);
}
-Codec::Codec(GPU& gpu_)
- : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+ : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
Codec::~Codec() {
@@ -43,46 +43,48 @@ Codec::~Codec() {
avcodec_close(av_codec_ctx);
}
+void Codec::Initialize() {
+ AVCodecID codec{AV_CODEC_ID_NONE};
+ switch (current_codec) {
+ case NvdecCommon::VideoCodec::H264:
+ codec = AV_CODEC_ID_H264;
+ break;
+ case NvdecCommon::VideoCodec::Vp9:
+ codec = AV_CODEC_ID_VP9;
+ break;
+ default:
+ return;
+ }
+ av_codec = avcodec_find_decoder(codec);
+ av_codec_ctx = avcodec_alloc_context3(av_codec);
+ av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+
+ // TODO(ameerj): libavcodec gpu hw acceleration
+
+ const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
+ if (av_error < 0) {
+ LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
+ avcodec_close(av_codec_ctx);
+ return;
+ }
+ initialized = true;
+ return;
+}
+
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
if (current_codec != codec) {
- LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
current_codec = codec;
+ LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
}
}
-void Codec::StateWrite(u32 offset, u64 arguments) {
- u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
- std::memcpy(state_offset, &arguments, sizeof(u64));
-}
-
void Codec::Decode() {
- bool is_first_frame = false;
+ const bool is_first_frame = !initialized;
if (!initialized) {
- if (current_codec == NvdecCommon::VideoCodec::H264) {
- av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
- } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
- av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
- } else {
- LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
- return;
- }
-
- av_codec_ctx = avcodec_alloc_context3(av_codec);
- av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-
- // TODO(ameerj): libavcodec gpu hw acceleration
-
- const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
- if (av_error < 0) {
- LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
- avcodec_close(av_codec_ctx);
- return;
- }
- initialized = true;
- is_first_frame = true;
+ Initialize();
}
- bool vp9_hidden_frame = false;
+ bool vp9_hidden_frame = false;
AVPacket packet{};
av_init_packet(&packet);
std::vector<u8> frame_data;
@@ -95,7 +97,7 @@ void Codec::Decode() {
}
packet.data = frame_data.data();
- packet.size = static_cast<int>(frame_data.size());
+ packet.size = static_cast<s32>(frame_data.size());
avcodec_send_packet(av_codec_ctx, &packet);
@@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
return current_codec;
}
+std::string_view Codec::GetCurrentCodecName() const {
+ switch (current_codec) {
+ case NvdecCommon::VideoCodec::None:
+ return "None";
+ case NvdecCommon::VideoCodec::H264:
+ return "H264";
+ case NvdecCommon::VideoCodec::Vp8:
+ return "VP8";
+ case NvdecCommon::VideoCodec::H265:
+ return "H265";
+ case NvdecCommon::VideoCodec::Vp9:
+ return "VP9";
+ default:
+ return "Unknown";
+ }
+};
+
} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 8a2a6c360..96c823c76 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -34,15 +34,15 @@ class VP9;
class Codec {
public:
- explicit Codec(GPU& gpu);
+ explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
~Codec();
+ /// Initialize the codec, returning success or failure
+ void Initialize();
+
/// Sets NVDEC video stream codec
void SetTargetCodec(NvdecCommon::VideoCodec codec);
- /// Populate NvdecRegisters state with argument value at the provided offset
- void StateWrite(u32 offset, u64 arguments);
-
/// Call decoders to construct headers, decode AVFrame with ffmpeg
void Decode();
@@ -51,6 +51,8 @@ public:
/// Returns the value of current_codec
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+ /// Return name of the current codec
+ [[nodiscard]] std::string_view GetCurrentCodecName() const;
private:
bool initialized{};
@@ -60,10 +62,10 @@ private:
AVCodecContext* av_codec_ctx{nullptr};
GPU& gpu;
+ const NvdecCommon::NvdecRegisters& state;
std::unique_ptr<Decoder::H264> h264_decoder;
std::unique_ptr<Decoder::VP9> vp9_decoder;
- NvdecCommon::NvdecRegisters state{};
std::queue<AVFramePtr> av_frames{};
};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index fea6aed98..5fb6d45ee 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -45,134 +45,129 @@ H264::~H264() = default;
const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
bool is_first_frame) {
- H264DecoderContext context{};
+ H264DecoderContext context;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
- const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
+ const s64 frame_number = context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) {
- frame.resize(context.frame_data_size);
-
+ frame.resize(context.stream_len);
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
- } else {
- /// Encode header
- H264BitWriter writer{};
- writer.WriteU(1, 24);
- writer.WriteU(0, 1);
- writer.WriteU(3, 2);
- writer.WriteU(7, 5);
- writer.WriteU(100, 8);
- writer.WriteU(0, 8);
- writer.WriteU(31, 8);
- writer.WriteUe(0);
- const auto chroma_format_idc =
- static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
- writer.WriteUe(chroma_format_idc);
- if (chroma_format_idc == 3) {
- writer.WriteBit(false);
- }
-
- writer.WriteUe(0);
- writer.WriteUe(0);
- writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
- writer.WriteBit(false); // Scaling matrix present flag
-
- const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
- writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
- writer.WriteUe(order_cnt_type);
- if (order_cnt_type == 0) {
- writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
- } else if (order_cnt_type == 1) {
- writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
-
- writer.WriteSe(0);
- writer.WriteSe(0);
- writer.WriteUe(0);
- }
-
- const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
- (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+ return frame;
+ }
- writer.WriteUe(16);
+ // Encode header
+ H264BitWriter writer{};
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(7, 5);
+ writer.WriteU(100, 8);
+ writer.WriteU(0, 8);
+ writer.WriteU(31, 8);
+ writer.WriteUe(0);
+ const u32 chroma_format_idc =
+ static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
+ writer.WriteUe(chroma_format_idc);
+ if (chroma_format_idc == 3) {
writer.WriteBit(false);
- writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
- writer.WriteUe(pic_height - 1);
- writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
-
- if (!context.h264_parameter_set.frame_mbs_only_flag) {
- writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
- }
+ }
- writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
- writer.WriteBit(false); // Frame cropping flag
- writer.WriteBit(false); // VUI parameter present flag
+ writer.WriteUe(0);
+ writer.WriteUe(0);
+ writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
+ writer.WriteBit(false); // Scaling matrix present flag
- writer.End();
+ writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
- // H264 PPS
- writer.WriteU(1, 24);
- writer.WriteU(0, 1);
- writer.WriteU(3, 2);
- writer.WriteU(8, 5);
+ const auto order_cnt_type =
+ static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
+ writer.WriteUe(order_cnt_type);
+ if (order_cnt_type == 0) {
+ writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
+ } else if (order_cnt_type == 1) {
+ writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
+ writer.WriteSe(0);
+ writer.WriteSe(0);
writer.WriteUe(0);
- writer.WriteUe(0);
+ }
- writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
- writer.WriteBit(false);
- writer.WriteUe(0);
- writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
- writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
- writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
- writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
- s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
- pic_init_qp = (pic_init_qp << 26) >> 26;
- writer.WriteSe(pic_init_qp);
- writer.WriteSe(0);
- s32 chroma_qp_index_offset =
- static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
- chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
+ const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
+ (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+
+ writer.WriteUe(16);
+ writer.WriteBit(false);
+ writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
+ writer.WriteUe(pic_height - 1);
+ writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
- writer.WriteSe(chroma_qp_index_offset);
- writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
- writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
- writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
- writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+ if (!context.h264_parameter_set.frame_mbs_only_flag) {
+ writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
+ }
+ writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
+ writer.WriteBit(false); // Frame cropping flag
+ writer.WriteBit(false); // VUI parameter present flag
+
+ writer.End();
+
+ // H264 PPS
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(8, 5);
+
+ writer.WriteUe(0);
+ writer.WriteUe(0);
+
+ writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
+ writer.WriteBit(false);
+ writer.WriteUe(0);
+ writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
+ writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
+ writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
+ writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
+ s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
+ writer.WriteSe(pic_init_qp);
+ writer.WriteSe(0);
+ s32 chroma_qp_index_offset =
+ static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
+
+ writer.WriteSe(chroma_qp_index_offset);
+ writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
+ writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
+ writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
+ writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+
+ writer.WriteBit(true);
+
+ for (s32 index = 0; index < 6; index++) {
writer.WriteBit(true);
+ std::span<const u8> matrix{context.weight_scale};
+ writer.WriteScalingList(matrix, index * 16, 16);
+ }
- for (s32 index = 0; index < 6; index++) {
+ if (context.h264_parameter_set.transform_8x8_mode_flag) {
+ for (s32 index = 0; index < 2; index++) {
writer.WriteBit(true);
- const auto matrix_x4 =
- std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
- writer.WriteScalingList(matrix_x4, index * 16, 16);
- }
-
- if (context.h264_parameter_set.transform_8x8_mode_flag) {
- for (s32 index = 0; index < 2; index++) {
- writer.WriteBit(true);
- const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
- context.scaling_matrix_8.end());
-
- writer.WriteScalingList(matrix_x8, index * 64, 64);
- }
+ std::span<const u8> matrix{context.weight_scale_8x8};
+ writer.WriteScalingList(matrix, index * 64, 64);
}
+ }
- s32 chroma_qp_index_offset2 =
- static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
- chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
+ s32 chroma_qp_index_offset2 =
+ static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
- writer.WriteSe(chroma_qp_index_offset2);
+ writer.WriteSe(chroma_qp_index_offset2);
- writer.End();
+ writer.End();
- const auto& encoded_header = writer.GetByteArray();
- frame.resize(encoded_header.size() + context.frame_data_size);
- std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
+ const auto& encoded_header = writer.GetByteArray();
+ frame.resize(encoded_header.size() + context.stream_len);
+ std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
- gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
- frame.data() + encoded_header.size(),
- context.frame_data_size);
- }
+ gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+ frame.data() + encoded_header.size(), context.stream_len);
return frame;
}
@@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
WriteBits(state ? 1 : 0, 1);
}
-void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
+void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
std::vector<u8> scan(count);
if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 0f3a1d9f3..bfe84a472 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -20,7 +20,9 @@
#pragma once
+#include <span>
#include <vector>
+#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/command_classes/nvdec_common.h"
@@ -48,7 +50,7 @@ public:
/// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
/// Writes the scaling matrices of the sream
- void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
+ void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
/// Return the bitstream as a vector.
[[nodiscard]] std::vector<u8>& GetByteArray();
@@ -78,40 +80,110 @@ public:
const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
private:
+ std::vector<u8> frame;
+ GPU& gpu;
+
struct H264ParameterSet {
- u32 log2_max_pic_order_cnt{};
- u32 delta_pic_order_always_zero_flag{};
- u32 frame_mbs_only_flag{};
- u32 pic_width_in_mbs{};
- u32 pic_height_in_map_units{};
- INSERT_PADDING_WORDS(1);
- u32 entropy_coding_mode_flag{};
- u32 bottom_field_pic_order_flag{};
- u32 num_refidx_l0_default_active{};
- u32 num_refidx_l1_default_active{};
- u32 deblocking_filter_control_flag{};
- u32 redundant_pic_count_flag{};
- u32 transform_8x8_mode_flag{};
- INSERT_PADDING_WORDS(9);
- u64 flags{};
- u32 frame_number{};
- u32 frame_number2{};
+ s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
+ s32 delta_pic_order_always_zero_flag; ///< 0x04
+ s32 frame_mbs_only_flag; ///< 0x08
+ u32 pic_width_in_mbs; ///< 0x0C
+ u32 frame_height_in_map_units; ///< 0x10
+ union { ///< 0x14
+ BitField<0, 2, u32> tile_format;
+ BitField<2, 3, u32> gob_height;
+ };
+ u32 entropy_coding_mode_flag; ///< 0x18
+ s32 pic_order_present_flag; ///< 0x1C
+ s32 num_refidx_l0_default_active; ///< 0x20
+ s32 num_refidx_l1_default_active; ///< 0x24
+ s32 deblocking_filter_control_present_flag; ///< 0x28
+ s32 redundant_pic_cnt_present_flag; ///< 0x2C
+ u32 transform_8x8_mode_flag; ///< 0x30
+ u32 pitch_luma; ///< 0x34
+ u32 pitch_chroma; ///< 0x38
+ u32 luma_top_offset; ///< 0x3C
+ u32 luma_bot_offset; ///< 0x40
+ u32 luma_frame_offset; ///< 0x44
+ u32 chroma_top_offset; ///< 0x48
+ u32 chroma_bot_offset; ///< 0x4C
+ u32 chroma_frame_offset; ///< 0x50
+ u32 hist_buffer_size; ///< 0x54
+ union { ///< 0x58
+ union {
+ BitField<0, 1, u64> mbaff_frame;
+ BitField<1, 1, u64> direct_8x8_inference;
+ BitField<2, 1, u64> weighted_pred;
+ BitField<3, 1, u64> constrained_intra_pred;
+ BitField<4, 1, u64> ref_pic;
+ BitField<5, 1, u64> field_pic;
+ BitField<6, 1, u64> bottom_field;
+ BitField<7, 1, u64> second_field;
+ } flags;
+ BitField<8, 4, u64> log2_max_frame_num_minus4;
+ BitField<12, 2, u64> chroma_format_idc;
+ BitField<14, 2, u64> pic_order_cnt_type;
+ BitField<16, 6, s64> pic_init_qp_minus26;
+ BitField<22, 5, s64> chroma_qp_index_offset;
+ BitField<27, 5, s64> second_chroma_qp_index_offset;
+ BitField<32, 2, u64> weighted_bipred_idc;
+ BitField<34, 7, u64> curr_pic_idx;
+ BitField<41, 5, u64> curr_col_idx;
+ BitField<46, 16, u64> frame_number;
+ BitField<62, 1, u64> frame_surfaces;
+ BitField<63, 1, u64> output_memory_layout;
+ };
};
- static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
+ static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
struct H264DecoderContext {
- INSERT_PADDING_BYTES(0x48);
- u32 frame_data_size{};
- INSERT_PADDING_BYTES(0xc);
- H264ParameterSet h264_parameter_set{};
- INSERT_PADDING_BYTES(0x100);
- std::array<u8, 0x60> scaling_matrix_4;
- std::array<u8, 0x80> scaling_matrix_8;
+ INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
+ u32 stream_len; ///< 0x0048
+ INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
+ H264ParameterSet h264_parameter_set; ///< 0x0058
+ INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
+ std::array<u8, 0x60> weight_scale; ///< 0x01C0
+ std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
};
- static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
-
- std::vector<u8> frame;
- GPU& gpu;
+ static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(H264ParameterSet, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
+ ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
+ ASSERT_POSITION(frame_mbs_only_flag, 0x08);
+ ASSERT_POSITION(pic_width_in_mbs, 0x0C);
+ ASSERT_POSITION(frame_height_in_map_units, 0x10);
+ ASSERT_POSITION(tile_format, 0x14);
+ ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
+ ASSERT_POSITION(pic_order_present_flag, 0x1C);
+ ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
+ ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
+ ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
+ ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
+ ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
+ ASSERT_POSITION(pitch_luma, 0x34);
+ ASSERT_POSITION(pitch_chroma, 0x38);
+ ASSERT_POSITION(luma_top_offset, 0x3C);
+ ASSERT_POSITION(luma_bot_offset, 0x40);
+ ASSERT_POSITION(luma_frame_offset, 0x44);
+ ASSERT_POSITION(chroma_top_offset, 0x48);
+ ASSERT_POSITION(chroma_bot_offset, 0x4C);
+ ASSERT_POSITION(chroma_frame_offset, 0x50);
+ ASSERT_POSITION(hist_buffer_size, 0x54);
+ ASSERT_POSITION(flags, 0x58);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(H264DecoderContext, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ ASSERT_POSITION(stream_len, 0x48);
+ ASSERT_POSITION(h264_parameter_set, 0x58);
+ ASSERT_POSITION(weight_scale, 0x1C0);
+#undef ASSERT_POSITION
};
} // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 29bb31418..902bc2a98 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
}
Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
- PictureInfo picture_info{};
+ PictureInfo picture_info;
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
Vp9PictureInfo vp9_info = picture_info.Convert();
@@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
}
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
- EntropyProbs entropy{};
+ EntropyProbs entropy;
gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
entropy.Convert(dst);
}
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 139501a1c..2da14f3ca 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -15,10 +15,10 @@ class GPU;
namespace Decoder {
struct Vp9FrameDimensions {
- s16 width{};
- s16 height{};
- s16 luma_pitch{};
- s16 chroma_pitch{};
+ s16 width;
+ s16 height;
+ s16 luma_pitch;
+ s16 chroma_pitch;
};
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
@@ -49,87 +49,87 @@ enum class TxMode {
};
struct Segmentation {
- u8 enabled{};
- u8 update_map{};
- u8 temporal_update{};
- u8 abs_delta{};
- std::array<u32, 8> feature_mask{};
- std::array<std::array<s16, 4>, 8> feature_data{};
+ u8 enabled;
+ u8 update_map;
+ u8 temporal_update;
+ u8 abs_delta;
+ std::array<u32, 8> feature_mask;
+ std::array<std::array<s16, 4>, 8> feature_data;
};
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
struct LoopFilter {
- u8 mode_ref_delta_enabled{};
- std::array<s8, 4> ref_deltas{};
- std::array<s8, 2> mode_deltas{};
+ u8 mode_ref_delta_enabled;
+ std::array<s8, 4> ref_deltas;
+ std::array<s8, 2> mode_deltas;
};
static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
struct Vp9EntropyProbs {
- std::array<u8, 36> y_mode_prob{};
- std::array<u8, 64> partition_prob{};
- std::array<u8, 1728> coef_probs{};
- std::array<u8, 8> switchable_interp_prob{};
- std::array<u8, 28> inter_mode_prob{};
- std::array<u8, 4> intra_inter_prob{};
- std::array<u8, 5> comp_inter_prob{};
- std::array<u8, 10> single_ref_prob{};
- std::array<u8, 5> comp_ref_prob{};
- std::array<u8, 6> tx_32x32_prob{};
- std::array<u8, 4> tx_16x16_prob{};
- std::array<u8, 2> tx_8x8_prob{};
- std::array<u8, 3> skip_probs{};
- std::array<u8, 3> joints{};
- std::array<u8, 2> sign{};
- std::array<u8, 20> classes{};
- std::array<u8, 2> class_0{};
- std::array<u8, 20> prob_bits{};
- std::array<u8, 12> class_0_fr{};
- std::array<u8, 6> fr{};
- std::array<u8, 2> class_0_hp{};
- std::array<u8, 2> high_precision{};
+ std::array<u8, 36> y_mode_prob; ///< 0x0000
+ std::array<u8, 64> partition_prob; ///< 0x0024
+ std::array<u8, 1728> coef_probs; ///< 0x0064
+ std::array<u8, 8> switchable_interp_prob; ///< 0x0724
+ std::array<u8, 28> inter_mode_prob; ///< 0x072C
+ std::array<u8, 4> intra_inter_prob; ///< 0x0748
+ std::array<u8, 5> comp_inter_prob; ///< 0x074C
+ std::array<u8, 10> single_ref_prob; ///< 0x0751
+ std::array<u8, 5> comp_ref_prob; ///< 0x075B
+ std::array<u8, 6> tx_32x32_prob; ///< 0x0760
+ std::array<u8, 4> tx_16x16_prob; ///< 0x0766
+ std::array<u8, 2> tx_8x8_prob; ///< 0x076A
+ std::array<u8, 3> skip_probs; ///< 0x076C
+ std::array<u8, 3> joints; ///< 0x076F
+ std::array<u8, 2> sign; ///< 0x0772
+ std::array<u8, 20> classes; ///< 0x0774
+ std::array<u8, 2> class_0; ///< 0x0788
+ std::array<u8, 20> prob_bits; ///< 0x078A
+ std::array<u8, 12> class_0_fr; ///< 0x079E
+ std::array<u8, 6> fr; ///< 0x07AA
+ std::array<u8, 2> class_0_hp; ///< 0x07B0
+ std::array<u8, 2> high_precision; ///< 0x07B2
};
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
struct Vp9PictureInfo {
- bool is_key_frame{};
- bool intra_only{};
- bool last_frame_was_key{};
- bool frame_size_changed{};
- bool error_resilient_mode{};
- bool last_frame_shown{};
- bool show_frame{};
- std::array<s8, 4> ref_frame_sign_bias{};
- s32 base_q_index{};
- s32 y_dc_delta_q{};
- s32 uv_dc_delta_q{};
- s32 uv_ac_delta_q{};
- bool lossless{};
- s32 transform_mode{};
- bool allow_high_precision_mv{};
- s32 interp_filter{};
- s32 reference_mode{};
- s8 comp_fixed_ref{};
- std::array<s8, 2> comp_var_ref{};
- s32 log2_tile_cols{};
- s32 log2_tile_rows{};
- bool segment_enabled{};
- bool segment_map_update{};
- bool segment_map_temporal_update{};
- s32 segment_abs_delta{};
- std::array<u32, 8> segment_feature_enable{};
- std::array<std::array<s16, 4>, 8> segment_feature_data{};
- bool mode_ref_delta_enabled{};
- bool use_prev_in_find_mv_refs{};
- std::array<s8, 4> ref_deltas{};
- std::array<s8, 2> mode_deltas{};
- Vp9EntropyProbs entropy{};
- Vp9FrameDimensions frame_size{};
- u8 first_level{};
- u8 sharpness_level{};
- u32 bitstream_size{};
- std::array<u64, 4> frame_offsets{};
- std::array<bool, 4> refresh_frame{};
+ bool is_key_frame;
+ bool intra_only;
+ bool last_frame_was_key;
+ bool frame_size_changed;
+ bool error_resilient_mode;
+ bool last_frame_shown;
+ bool show_frame;
+ std::array<s8, 4> ref_frame_sign_bias;
+ s32 base_q_index;
+ s32 y_dc_delta_q;
+ s32 uv_dc_delta_q;
+ s32 uv_ac_delta_q;
+ bool lossless;
+ s32 transform_mode;
+ bool allow_high_precision_mv;
+ s32 interp_filter;
+ s32 reference_mode;
+ s8 comp_fixed_ref;
+ std::array<s8, 2> comp_var_ref;
+ s32 log2_tile_cols;
+ s32 log2_tile_rows;
+ bool segment_enabled;
+ bool segment_map_update;
+ bool segment_map_temporal_update;
+ s32 segment_abs_delta;
+ std::array<u32, 8> segment_feature_enable;
+ std::array<std::array<s16, 4>, 8> segment_feature_data;
+ bool mode_ref_delta_enabled;
+ bool use_prev_in_find_mv_refs;
+ std::array<s8, 4> ref_deltas;
+ std::array<s8, 2> mode_deltas;
+ Vp9EntropyProbs entropy;
+ Vp9FrameDimensions frame_size;
+ u8 first_level;
+ u8 sharpness_level;
+ u32 bitstream_size;
+ std::array<u64, 4> frame_offsets;
+ std::array<bool, 4> refresh_frame;
};
struct Vp9FrameContainer {
@@ -138,35 +138,35 @@ struct Vp9FrameContainer {
};
struct PictureInfo {
- INSERT_PADDING_WORDS(12);
- u32 bitstream_size{};
- INSERT_PADDING_WORDS(5);
- Vp9FrameDimensions last_frame_size{};
- Vp9FrameDimensions golden_frame_size{};
- Vp9FrameDimensions alt_frame_size{};
- Vp9FrameDimensions current_frame_size{};
- u32 vp9_flags{};
- std::array<s8, 4> ref_frame_sign_bias{};
- u8 first_level{};
- u8 sharpness_level{};
- u8 base_q_index{};
- u8 y_dc_delta_q{};
- u8 uv_ac_delta_q{};
- u8 uv_dc_delta_q{};
- u8 lossless{};
- u8 tx_mode{};
- u8 allow_high_precision_mv{};
- u8 interp_filter{};
- u8 reference_mode{};
- s8 comp_fixed_ref{};
- std::array<s8, 2> comp_var_ref{};
- u8 log2_tile_cols{};
- u8 log2_tile_rows{};
- Segmentation segmentation{};
- LoopFilter loop_filter{};
- INSERT_PADDING_BYTES(5);
- u32 surface_params{};
- INSERT_PADDING_WORDS(3);
+ INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
+ u32 bitstream_size; ///< 0x30
+ INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
+ Vp9FrameDimensions last_frame_size; ///< 0x48
+ Vp9FrameDimensions golden_frame_size; ///< 0x50
+ Vp9FrameDimensions alt_frame_size; ///< 0x58
+ Vp9FrameDimensions current_frame_size; ///< 0x60
+ u32 vp9_flags; ///< 0x68
+ std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
+ u8 first_level; ///< 0x70
+ u8 sharpness_level; ///< 0x71
+ u8 base_q_index; ///< 0x72
+ u8 y_dc_delta_q; ///< 0x73
+ u8 uv_ac_delta_q; ///< 0x74
+ u8 uv_dc_delta_q; ///< 0x75
+ u8 lossless; ///< 0x76
+ u8 tx_mode; ///< 0x77
+ u8 allow_high_precision_mv; ///< 0x78
+ u8 interp_filter; ///< 0x79
+ u8 reference_mode; ///< 0x7A
+ s8 comp_fixed_ref; ///< 0x7B
+ std::array<s8, 2> comp_var_ref; ///< 0x7C
+ u8 log2_tile_cols; ///< 0x7E
+ u8 log2_tile_rows; ///< 0x7F
+ Segmentation segmentation; ///< 0x80
+ LoopFilter loop_filter; ///< 0xE4
+ INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
+ u32 surface_params; ///< 0xF0
+ INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
[[nodiscard]] Vp9PictureInfo Convert() const {
return {
@@ -176,6 +176,7 @@ struct PictureInfo {
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
+ .show_frame = false,
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,
@@ -204,45 +205,48 @@ struct PictureInfo {
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
.ref_deltas = loop_filter.ref_deltas,
.mode_deltas = loop_filter.mode_deltas,
+ .entropy{},
.frame_size = current_frame_size,
.first_level = first_level,
.sharpness_level = sharpness_level,
.bitstream_size = bitstream_size,
+ .frame_offsets{},
+ .refresh_frame{},
};
}
};
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
struct EntropyProbs {
- INSERT_PADDING_BYTES(1024);
- std::array<u8, 28> inter_mode_prob{};
- std::array<u8, 4> intra_inter_prob{};
- INSERT_PADDING_BYTES(80);
- std::array<u8, 2> tx_8x8_prob{};
- std::array<u8, 4> tx_16x16_prob{};
- std::array<u8, 6> tx_32x32_prob{};
- std::array<u8, 4> y_mode_prob_e8{};
- std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
- INSERT_PADDING_BYTES(64);
- std::array<u8, 64> partition_prob{};
- INSERT_PADDING_BYTES(10);
- std::array<u8, 8> switchable_interp_prob{};
- std::array<u8, 5> comp_inter_prob{};
- std::array<u8, 3> skip_probs{};
- INSERT_PADDING_BYTES(1);
- std::array<u8, 3> joints{};
- std::array<u8, 2> sign{};
- std::array<u8, 2> class_0{};
- std::array<u8, 6> fr{};
- std::array<u8, 2> class_0_hp{};
- std::array<u8, 2> high_precision{};
- std::array<u8, 20> classes{};
- std::array<u8, 12> class_0_fr{};
- std::array<u8, 20> pred_bits{};
- std::array<u8, 10> single_ref_prob{};
- std::array<u8, 5> comp_ref_prob{};
- INSERT_PADDING_BYTES(17);
- std::array<u8, 2304> coef_probs{};
+ INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
+ std::array<u8, 28> inter_mode_prob; ///< 0x0400
+ std::array<u8, 4> intra_inter_prob; ///< 0x041C
+ INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
+ std::array<u8, 2> tx_8x8_prob; ///< 0x0470
+ std::array<u8, 4> tx_16x16_prob; ///< 0x0472
+ std::array<u8, 6> tx_32x32_prob; ///< 0x0476
+ std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
+ std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
+ INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
+ std::array<u8, 64> partition_prob; ///< 0x04E0
+ INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
+ std::array<u8, 8> switchable_interp_prob; ///< 0x052A
+ std::array<u8, 5> comp_inter_prob; ///< 0x0532
+ std::array<u8, 3> skip_probs; ///< 0x0537
+ INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
+ std::array<u8, 3> joints; ///< 0x053B
+ std::array<u8, 2> sign; ///< 0x053E
+ std::array<u8, 2> class_0; ///< 0x0540
+ std::array<u8, 6> fr; ///< 0x0542
+ std::array<u8, 2> class_0_hp; ///< 0x0548
+ std::array<u8, 2> high_precision; ///< 0x054A
+ std::array<u8, 20> classes; ///< 0x054C
+ std::array<u8, 12> class_0_fr; ///< 0x0560
+ std::array<u8, 20> pred_bits; ///< 0x056C
+ std::array<u8, 10> single_ref_prob; ///< 0x0580
+ std::array<u8, 5> comp_ref_prob; ///< 0x058A
+ INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
+ std::array<u8, 2304> coef_probs; ///< 0x05A0
void Convert(Vp9EntropyProbs& fc) {
fc.inter_mode_prob = inter_mode_prob;
@@ -293,10 +297,45 @@ struct RefPoolElement {
};
struct FrameContexts {
- s64 from{};
- bool adapted{};
- Vp9EntropyProbs probs{};
+ s64 from;
+ bool adapted;
+ Vp9EntropyProbs probs;
};
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(partition_prob, 0x0024);
+ASSERT_POSITION(switchable_interp_prob, 0x0724);
+ASSERT_POSITION(sign, 0x0772);
+ASSERT_POSITION(class_0_fr, 0x079E);
+ASSERT_POSITION(high_precision, 0x07B2);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(PictureInfo, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(bitstream_size, 0x30);
+ASSERT_POSITION(last_frame_size, 0x48);
+ASSERT_POSITION(first_level, 0x70);
+ASSERT_POSITION(segmentation, 0x80);
+ASSERT_POSITION(loop_filter, 0xE4);
+ASSERT_POSITION(surface_params, 0xF0);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position) \
+ static_assert(offsetof(EntropyProbs, field_name) == position, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(inter_mode_prob, 0x400);
+ASSERT_POSITION(tx_8x8_prob, 0x470);
+ASSERT_POSITION(partition_prob, 0x4E0);
+ASSERT_POSITION(class_0, 0x540);
+ASSERT_POSITION(class_0_fr, 0x560);
+ASSERT_POSITION(coef_probs, 0x5A0);
+#undef ASSERT_POSITION
+
}; // namespace Decoder
}; // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index e4f919afd..b5e3b70fc 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -8,22 +8,21 @@
namespace Tegra {
-Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
+#define NVDEC_REG_INDEX(field_name) \
+ (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
+
+Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
Nvdec::~Nvdec() = default;
-void Nvdec::ProcessMethod(Method method, u32 argument) {
- if (method == Method::SetVideoCodec) {
- codec->StateWrite(static_cast<u32>(method), argument);
- } else {
- codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
- }
+void Nvdec::ProcessMethod(u32 method, u32 argument) {
+ state.reg_array[method] = static_cast<u64>(argument) << 8;
switch (method) {
- case Method::SetVideoCodec:
+ case NVDEC_REG_INDEX(set_codec_id):
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
break;
- case Method::Execute:
+ case NVDEC_REG_INDEX(execute):
Execute();
break;
}
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e66be80b8..6e1da0b04 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -14,16 +14,11 @@ class GPU;
class Nvdec {
public:
- enum class Method : u32 {
- SetVideoCodec = 0x80,
- Execute = 0xc0,
- };
-
explicit Nvdec(GPU& gpu);
~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered
- void ProcessMethod(Method method, u32 argument);
+ void ProcessMethod(u32 method, u32 argument);
/// Return most recently decoded frame
[[nodiscard]] AVFramePtr GetFrame();
@@ -33,6 +28,7 @@ private:
void Execute();
GPU& gpu;
+ NvdecCommon::NvdecRegisters state;
std::unique_ptr<Codec> codec;
};
} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
index 01b5e086d..6a24e00a0 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -4,40 +4,13 @@
#pragma once
+#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra::NvdecCommon {
-struct NvdecRegisters {
- INSERT_PADDING_WORDS(256);
- u64 set_codec_id{};
- INSERT_PADDING_WORDS(254);
- u64 set_platform_id{};
- u64 picture_info_offset{};
- u64 frame_bitstream_offset{};
- u64 frame_number{};
- u64 h264_slice_data_offsets{};
- u64 h264_mv_dump_offset{};
- INSERT_PADDING_WORDS(6);
- u64 frame_stats_offset{};
- u64 h264_last_surface_luma_offset{};
- u64 h264_last_surface_chroma_offset{};
- std::array<u64, 17> surface_luma_offset{};
- std::array<u64, 17> surface_chroma_offset{};
- INSERT_PADDING_WORDS(132);
- u64 vp9_entropy_probs_offset{};
- u64 vp9_backward_updates_offset{};
- u64 vp9_last_frame_segmap_offset{};
- u64 vp9_curr_frame_segmap_offset{};
- INSERT_PADDING_WORDS(2);
- u64 vp9_last_frame_mvs_offset{};
- u64 vp9_curr_frame_mvs_offset{};
- INSERT_PADDING_WORDS(2);
-};
-static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
-
-enum class VideoCodec : u32 {
+enum class VideoCodec : u64 {
None = 0x0,
H264 = 0x3,
Vp8 = 0x5,
@@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
Vp9 = 0x9,
};
+// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
+// doubling the sizes here is compensating for that.
+struct NvdecRegisters {
+ static constexpr std::size_t NUM_REGS = 0x178;
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
+ VideoCodec set_codec_id; ///< 0x0400
+ INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
+ u64 execute; ///< 0x0600
+ INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
+ struct { ///< 0x0800
+ union {
+ BitField<0, 3, VideoCodec> codec;
+ BitField<4, 1, u64> gp_timer_on;
+ BitField<13, 1, u64> mb_timer_on;
+ BitField<14, 1, u64> intra_frame_pslc;
+ BitField<17, 1, u64> all_intra_frame;
+ };
+ } control_params;
+ u64 picture_info_offset; ///< 0x0808
+ u64 frame_bitstream_offset; ///< 0x0810
+ u64 frame_number; ///< 0x0818
+ u64 h264_slice_data_offsets; ///< 0x0820
+ u64 h264_mv_dump_offset; ///< 0x0828
+ INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
+ u64 frame_stats_offset; ///< 0x0848
+ u64 h264_last_surface_luma_offset; ///< 0x0850
+ u64 h264_last_surface_chroma_offset; ///< 0x0858
+ std::array<u64, 17> surface_luma_offset; ///< 0x0860
+ std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
+ INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
+ u64 vp9_entropy_probs_offset; ///< 0x0B80
+ u64 vp9_backward_updates_offset; ///< 0x0B88
+ u64 vp9_last_frame_segmap_offset; ///< 0x0B90
+ u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
+ INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
+ u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
+ u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
+ INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
+ };
+ std::array<u64, NUM_REGS> reg_array;
+ };
+};
+static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(set_codec_id, 0x80);
+ASSERT_REG_POSITION(execute, 0xC0);
+ASSERT_REG_POSITION(control_params, 0x100);
+ASSERT_REG_POSITION(picture_info_offset, 0x101);
+ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
+ASSERT_REG_POSITION(frame_number, 0x103);
+ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
+ASSERT_REG_POSITION(frame_stats_offset, 0x109);
+ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
+ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
+ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
+ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
+ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
+ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
+ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
+ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
+ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
+ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
+
+#undef ASSERT_REG_POSITION
+
} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 0a8b82f2b..ffb7c82a1 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -3,7 +3,21 @@
// Refer to the license.txt file included.
#include <array>
+
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include <libswscale/swscale.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
#include "common/assert.h"
+#include "common/logging/log.h"
+
#include "video_core/command_classes/nvdec.h"
#include "video_core/command_classes/vic.h"
#include "video_core/engines/maxwell_3d.h"
@@ -11,10 +25,6 @@
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
-extern "C" {
-#include <libswscale/swscale.h>
-}
-
namespace Tegra {
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
@@ -119,28 +129,27 @@ void Vic::Execute() {
const std::size_t surface_width = config.surface_width_minus1 + 1;
const std::size_t surface_height = config.surface_height_minus1 + 1;
- const std::size_t half_width = surface_width / 2;
- const std::size_t half_height = config.surface_height_minus1 / 2;
+ const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
+ const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
+ const std::size_t half_width = frame_width / 2;
+ const std::size_t half_height = frame_height / 2;
const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
const auto* luma_ptr = frame->data[0];
const auto* chroma_b_ptr = frame->data[1];
const auto* chroma_r_ptr = frame->data[2];
- const auto stride = frame->linesize[0];
- const auto half_stride = frame->linesize[1];
+ const auto stride = static_cast<size_t>(frame->linesize[0]);
+ const auto half_stride = static_cast<size_t>(frame->linesize[1]);
luma_buffer.resize(aligned_width * surface_height);
- chroma_buffer.resize(aligned_width * half_height);
+ chroma_buffer.resize(aligned_width * surface_height / 2);
// Populate luma buffer
- for (std::size_t y = 0; y < surface_height - 1; ++y) {
+ for (std::size_t y = 0; y < frame_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
-
- const std::size_t size = surface_width;
-
- for (std::size_t offset = 0; offset < size; ++offset) {
- luma_buffer[dst + offset] = luma_ptr[src + offset];
+ for (std::size_t x = 0; x < frame_width; ++x) {
+ luma_buffer[dst + x] = luma_ptr[src + x];
}
}
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 7149af290..b1be065c3 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
+
+void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
+ FillBlock(tables[0], OFF(shader_config[0]),
+ NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
+}
} // Anonymous namespace
void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
SetupIndexBuffer(tables);
SetupDirtyDescriptors(tables);
SetupDirtyRenderTargets(tables);
+ SetupDirtyShaders(tables);
}
} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 702688ace..504465d3f 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -36,6 +36,8 @@ enum : u8 {
IndexBuffer,
+ Shaders,
+
LastCommonEntry,
};
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 8b33c04ab..8d28bd884 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -4,6 +4,7 @@
#include "common/cityhash.h"
#include "common/microprofile.h"
+#include "common/settings.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/dma_pusher.h"
@@ -76,8 +77,13 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize(command_list_header.size);
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ if (Settings::IsGPULevelHigh()) {
+ gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ } else {
+ gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ }
}
for (std::size_t index = 0; index < command_headers.size();) {
const CommandHeader& command_header = command_headers[index];
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
deleted file mode 100644
index f46e81bb7..000000000
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <type_traits>
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-#include "video_core/textures/texture.h"
-
-namespace Tegra::Engines {
-
-struct SamplerDescriptor {
- union {
- u32 raw = 0;
- BitField<0, 2, Tegra::Shader::TextureType> texture_type;
- BitField<2, 3, Tegra::Texture::ComponentType> r_type;
- BitField<5, 1, u32> is_array;
- BitField<6, 1, u32> is_buffer;
- BitField<7, 1, u32> is_shadow;
- BitField<8, 3, Tegra::Texture::ComponentType> g_type;
- BitField<11, 3, Tegra::Texture::ComponentType> b_type;
- BitField<14, 3, Tegra::Texture::ComponentType> a_type;
- BitField<17, 7, Tegra::Texture::TextureFormat> format;
- };
-
- bool operator==(const SamplerDescriptor& rhs) const noexcept {
- return raw == rhs.raw;
- }
-
- bool operator!=(const SamplerDescriptor& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
- using Tegra::Shader::TextureType;
- SamplerDescriptor result;
-
- result.format.Assign(tic.format.Value());
- result.r_type.Assign(tic.r_type.Value());
- result.g_type.Assign(tic.g_type.Value());
- result.b_type.Assign(tic.b_type.Value());
- result.a_type.Assign(tic.a_type.Value());
-
- switch (tic.texture_type.Value()) {
- case Tegra::Texture::TextureType::Texture1D:
- result.texture_type.Assign(TextureType::Texture1D);
- return result;
- case Tegra::Texture::TextureType::Texture2D:
- result.texture_type.Assign(TextureType::Texture2D);
- return result;
- case Tegra::Texture::TextureType::Texture3D:
- result.texture_type.Assign(TextureType::Texture3D);
- return result;
- case Tegra::Texture::TextureType::TextureCubemap:
- result.texture_type.Assign(TextureType::TextureCube);
- return result;
- case Tegra::Texture::TextureType::Texture1DArray:
- result.texture_type.Assign(TextureType::Texture1D);
- result.is_array.Assign(1);
- return result;
- case Tegra::Texture::TextureType::Texture2DArray:
- result.texture_type.Assign(TextureType::Texture2D);
- result.is_array.Assign(1);
- return result;
- case Tegra::Texture::TextureType::Texture1DBuffer:
- result.texture_type.Assign(TextureType::Texture1D);
- result.is_buffer.Assign(1);
- return result;
- case Tegra::Texture::TextureType::Texture2DNoMipmap:
- result.texture_type.Assign(TextureType::Texture2D);
- return result;
- case Tegra::Texture::TextureType::TextureCubeArray:
- result.texture_type.Assign(TextureType::TextureCube);
- result.is_array.Assign(1);
- return result;
- default:
- result.texture_type.Assign(TextureType::Texture2D);
- return result;
- }
- }
-};
-static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
-
-class ConstBufferEngineInterface {
-public:
- virtual ~ConstBufferEngineInterface() = default;
- virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
- virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
- virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const = 0;
- virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
- virtual u32 GetBoundBuffer() const = 0;
-
- virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
- virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
-};
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0f640fdae..f26530ede 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -7,6 +7,10 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
namespace Tegra::Engines {
@@ -49,7 +53,7 @@ void Fermi2D::Blit() {
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
const auto& args = regs.pixels_from_memory;
- const Config config{
+ Config config{
.operation = regs.operation,
.filter = args.sample_mode.filter,
.dst_x0 = args.dst_x0,
@@ -61,7 +65,21 @@ void Fermi2D::Blit() {
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
- if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
+ Surface src = regs.src;
+ const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+ const auto need_align_to_pitch =
+ src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
+ static_cast<s32>(src.width) == config.src_x1 &&
+ config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
+ if (need_align_to_pitch) {
+ auto address = src.Address() + config.src_x0 * bytes_per_pixel;
+ src.addr_upper = static_cast<u32>(address >> 32);
+ src.addr_lower = static_cast<u32>(address);
+ src.width -= config.src_x0;
+ config.src_x1 -= config.src_x0;
+ config.src_x0 = 0;
+ }
+ if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index a4170ffff..d76c5ed56 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -299,7 +299,7 @@ public:
};
private:
- VideoCore::RasterizerInterface* rasterizer;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a9b75091e..492b4c5a3 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -8,7 +8,6 @@
#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
-u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
- ASSERT(stage == ShaderType::Compute);
- const auto& buffer = launch_description.const_buffer_config[const_buffer];
- u32 result;
- std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
- return result;
-}
-
-SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
- return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
-}
-
-SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const {
- ASSERT(stage == ShaderType::Compute);
- const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
- const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
- return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}
-
-SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
- const Texture::TextureHandle tex_handle{handle};
- const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
- const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
-
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
- result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
- return result;
-}
-
-VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
- return rasterizer->AccessGuestDriverProfile();
-}
-
-const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
- return rasterizer->AccessGuestDriverProfile();
-}
-
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
-
- const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
- LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
-
- rasterizer->DispatchCompute(code_addr);
+ rasterizer->DispatchCompute();
}
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7c40cba38..f8b8d06ac 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,10 +10,8 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
@@ -40,7 +38,7 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
+class KeplerCompute final : public EngineInterface {
public:
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
~KeplerCompute();
@@ -209,23 +207,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
-
- SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
-
- SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const override;
-
- SamplerDescriptor AccessSampler(u32 handle) const override;
-
- u32 GetBoundBuffer() const override {
- return regs.tex_cb_index;
- }
-
- VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
-
- const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
-
private:
void ProcessLaunch();
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index aab6b8f7a..b18b8a02a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -8,7 +8,6 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer->Clear();
}
-u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
- ASSERT(stage != ShaderType::Compute);
- const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
- const auto& buffer = shader_stage.const_buffers[const_buffer];
- return memory_manager.Read<u32>(buffer.address + offset);
-}
-
-SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
- return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
-}
-
-SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const {
- ASSERT(stage != ShaderType::Compute);
- const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
- const auto& tex_info_buffer = shader.const_buffers[const_buffer];
- const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
- return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}
-
-SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
- const Texture::TextureHandle tex_handle{handle};
- const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
- const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
-
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
- result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
- return result;
-}
-
-VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
- return rasterizer->AccessGuestDriverProfile();
-}
-
-const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
- return rasterizer->AccessGuestDriverProfile();
-}
-
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ffed42a29..1aa43523a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,11 +17,9 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/macro/macro.h"
#include "video_core/textures/texture.h"
@@ -49,7 +47,7 @@ namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
-class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
+class Maxwell3D final : public EngineInterface {
public:
explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
~Maxwell3D();
@@ -242,6 +240,7 @@ public:
return 4;
default:
UNREACHABLE();
+ return 1;
}
}
@@ -306,10 +305,6 @@ public:
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
}
- bool IsConstant() const {
- return constant;
- }
-
bool IsValid() const {
return size != Size::Invalid;
}
@@ -911,7 +906,11 @@ public:
u32 fill_rectangle;
- INSERT_PADDING_WORDS_NOINIT(0x8);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
+
+ u32 conservative_raster_enable;
+
+ INSERT_PADDING_WORDS_NOINIT(0x5);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
@@ -958,7 +957,11 @@ public:
SamplerIndex sampler_index;
- INSERT_PADDING_WORDS_NOINIT(0x25);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
+
+ std::array<u32, 8> gp_passthrough_mask;
+
+ INSERT_PADDING_WORDS_NOINIT(0x1B);
u32 depth_test_enable;
@@ -1151,7 +1154,11 @@ public:
u32 index;
} primitive_restart;
- INSERT_PADDING_WORDS_NOINIT(0x5F);
+ INSERT_PADDING_WORDS_NOINIT(0xE);
+
+ u32 provoking_vertex_last;
+
+ INSERT_PADDING_WORDS_NOINIT(0x50);
struct {
u32 start_addr_high;
@@ -1423,23 +1430,6 @@ public:
void FlushMMEInlineDraw();
- u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
-
- SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
-
- SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const override;
-
- SamplerDescriptor AccessSampler(u32 handle) const override;
-
- u32 GetBoundBuffer() const override {
- return regs.tex_cb_index;
- }
-
- VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
-
- const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
-
bool ShouldExecute() const {
return execute_on;
}
@@ -1629,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
+ASSERT_REG_POSITION(conservative_raster_enable, 0x452);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
@@ -1637,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(zeta_depth, 0x48c);
ASSERT_REG_POSITION(sampler_index, 0x48D);
+ASSERT_REG_POSITION(gp_passthrough_mask, 0x490);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1689,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
+ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2ee980bab..c7ec1eac9 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
@@ -12,6 +13,9 @@
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
+MICROPROFILE_DECLARE(GPU_DMAEngine);
+MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
+
namespace Tegra::Engines {
using namespace Texture;
@@ -21,6 +25,10 @@ MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
MaxwellDMA::~MaxwellDMA() = default;
+void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
+}
+
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
@@ -39,12 +47,12 @@ void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
void MaxwellDMA::Launch() {
+ MICROPROFILE_SCOPE(GPU_DMAEngine);
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
static_cast<GPUVAddr>(regs.offset_out));
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.remap_enable == 0);
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
@@ -77,11 +85,31 @@ void MaxwellDMA::CopyPitchToPitch() {
// When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
// buffer of length `line_length_in`.
// Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
+ auto& accelerate = rasterizer->AccessAccelerateDMA();
if (!regs.launch_dma.multi_line_enable) {
- memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in);
+ const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 &&
+ regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
+ // TODO: allow multisized components.
+ if (is_buffer_clear) {
+ ASSERT(regs.remap_const.component_size_minus_one == 3);
+ accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
+ std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ memory_manager.WriteBlockUnsafe(regs.offset_out,
+ reinterpret_cast<u8*>(tmp_buffer.data()),
+ regs.line_length_in * sizeof(u32));
+ return;
+ }
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
+ if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
+ memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
+ }
return;
}
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
+
// Perform a line-by-line copy.
// We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
// There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
@@ -99,12 +127,14 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
// Optimized path for micro copies.
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
- if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
+ if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
+ regs.src_params.height > GOB_SIZE_Y) {
FastCopyBlockLinearToPitch();
return;
}
// Deswizzle the input and copy it over.
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const Parameters& src_params = regs.src_params;
const u32 width = src_params.width;
@@ -134,6 +164,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
void MaxwellDMA::CopyPitchToBlockLinear() {
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c77f02a22..9e457ae16 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -21,8 +21,20 @@ namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines {
+class AccelerateDMAInterface {
+public:
+ /// Write the value to the register identified by method.
+ virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
+
+ virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
+};
+
/**
* This engine is known as gk104_copy. Documentation can be found in:
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
@@ -187,6 +199,8 @@ public:
};
static_assert(sizeof(RemapConst) == 12);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
~MaxwellDMA() override;
@@ -213,6 +227,7 @@ private:
Core::System& system;
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
@@ -240,7 +255,9 @@ private:
u32 pitch_out;
u32 line_length_in;
u32 line_count;
- u32 reserved06[0xb8];
+ u32 reserved06[0xb6];
+ u32 remap_consta_value;
+ u32 remap_constb_value;
RemapConst remap_const;
Parameters dst_params;
u32 reserved07[0x1];
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b62..000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <bitset>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-struct Register {
- /// Number of registers
- static constexpr std::size_t NumRegisters = 256;
-
- /// Register 255 is special cased to always be 0
- static constexpr std::size_t ZeroIndex = 255;
-
- enum class Size : u64 {
- Byte = 0,
- Short = 1,
- Word = 2,
- Long = 3,
- };
-
- constexpr Register() = default;
-
- constexpr Register(u64 value_) : value(value_) {}
-
- [[nodiscard]] constexpr operator u64() const {
- return value;
- }
-
- template <typename T>
- [[nodiscard]] constexpr u64 operator-(const T& oth) const {
- return value - oth;
- }
-
- template <typename T>
- [[nodiscard]] constexpr u64 operator&(const T& oth) const {
- return value & oth;
- }
-
- [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
- return value & oth.value;
- }
-
- [[nodiscard]] constexpr u64 operator~() const {
- return ~value;
- }
-
- [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
- elem = (value + elem) & 3;
- return (value & ~3) + elem;
- }
-
-private:
- u64 value{};
-};
-
-enum class AttributeSize : u64 {
- Word = 0,
- DoubleWord = 1,
- TripleWord = 2,
- QuadWord = 3,
-};
-
-union Attribute {
- Attribute() = default;
-
- constexpr explicit Attribute(u64 value_) : value(value_) {}
-
- enum class Index : u64 {
- LayerViewportPointSize = 6,
- Position = 7,
- Attribute_0 = 8,
- Attribute_31 = 39,
- FrontColor = 40,
- FrontSecondaryColor = 41,
- BackColor = 42,
- BackSecondaryColor = 43,
- ClipDistances0123 = 44,
- ClipDistances4567 = 45,
- PointCoord = 46,
- // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
- // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
- // shader.
- TessCoordInstanceIDVertexID = 47,
- TexCoord_0 = 48,
- TexCoord_7 = 55,
- // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
- // shader. It is unknown what the other values contain.
- FrontFacing = 63,
- };
-
- union {
- BitField<20, 10, u64> immediate;
- BitField<22, 2, u64> element;
- BitField<24, 6, Index> index;
- BitField<31, 1, u64> patch;
- BitField<47, 3, AttributeSize> size;
-
- [[nodiscard]] bool IsPhysical() const {
- return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
- }
- } fmt20;
-
- union {
- BitField<30, 2, u64> element;
- BitField<32, 6, Index> index;
- } fmt28;
-
- BitField<39, 8, u64> reg;
- u64 value{};
-};
-
-union Sampler {
- Sampler() = default;
-
- constexpr explicit Sampler(u64 value_) : value(value_) {}
-
- enum class Index : u64 {
- Sampler_0 = 8,
- };
-
- BitField<36, 13, Index> index;
- u64 value{};
-};
-
-union Image {
- Image() = default;
-
- constexpr explicit Image(u64 value_) : value{value_} {}
-
- BitField<36, 13, u64> index;
- u64 value;
-};
-
-} // namespace Tegra::Shader
-
-namespace std {
-
-// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
-template <>
-struct make_unsigned<Tegra::Shader::Attribute> {
- using type = Tegra::Shader::Attribute;
-};
-
-template <>
-struct make_unsigned<Tegra::Shader::Register> {
- using type = Tegra::Shader::Register;
-};
-
-} // namespace std
-
-namespace Tegra::Shader {
-
-enum class Pred : u64 {
- UnusedIndex = 0x7,
- NeverExecute = 0xF,
-};
-
-enum class PredCondition : u64 {
- F = 0, // Always false
- LT = 1, // Ordered less than
- EQ = 2, // Ordered equal
- LE = 3, // Ordered less than or equal
- GT = 4, // Ordered greater than
- NE = 5, // Ordered not equal
- GE = 6, // Ordered greater than or equal
- NUM = 7, // Ordered
- NAN_ = 8, // Unordered
- LTU = 9, // Unordered less than
- EQU = 10, // Unordered equal
- LEU = 11, // Unordered less than or equal
- GTU = 12, // Unordered greater than
- NEU = 13, // Unordered not equal
- GEU = 14, // Unordered greater than or equal
- T = 15, // Always true
-};
-
-enum class PredOperation : u64 {
- And = 0,
- Or = 1,
- Xor = 2,
-};
-
-enum class LogicOperation : u64 {
- And = 0,
- Or = 1,
- Xor = 2,
- PassB = 3,
-};
-
-enum class SubOp : u64 {
- Cos = 0x0,
- Sin = 0x1,
- Ex2 = 0x2,
- Lg2 = 0x3,
- Rcp = 0x4,
- Rsq = 0x5,
- Sqrt = 0x8,
-};
-
-enum class F2iRoundingOp : u64 {
- RoundEven = 0,
- Floor = 1,
- Ceil = 2,
- Trunc = 3,
-};
-
-enum class F2fRoundingOp : u64 {
- None = 0,
- Pass = 3,
- Round = 8,
- Floor = 9,
- Ceil = 10,
- Trunc = 11,
-};
-
-enum class AtomicOp : u64 {
- Add = 0,
- Min = 1,
- Max = 2,
- Inc = 3,
- Dec = 4,
- And = 5,
- Or = 6,
- Xor = 7,
- Exch = 8,
- SafeAdd = 10,
-};
-
-enum class GlobalAtomicType : u64 {
- U32 = 0,
- S32 = 1,
- U64 = 2,
- F32_FTZ_RN = 3,
- F16x2_FTZ_RN = 4,
- S64 = 5,
-};
-
-enum class UniformType : u64 {
- UnsignedByte = 0,
- SignedByte = 1,
- UnsignedShort = 2,
- SignedShort = 3,
- Single = 4,
- Double = 5,
- Quad = 6,
- UnsignedQuad = 7,
-};
-
-enum class StoreType : u64 {
- Unsigned8 = 0,
- Signed8 = 1,
- Unsigned16 = 2,
- Signed16 = 3,
- Bits32 = 4,
- Bits64 = 5,
- Bits128 = 6,
-};
-
-enum class AtomicType : u64 {
- U32 = 0,
- S32 = 1,
- U64 = 2,
- S64 = 3,
-};
-
-enum class IMinMaxExchange : u64 {
- None = 0,
- XLo = 1,
- XMed = 2,
- XHi = 3,
-};
-
-enum class VideoType : u64 {
- Size16_Low = 0,
- Size16_High = 1,
- Size32 = 2,
- Invalid = 3,
-};
-
-enum class VmadShr : u64 {
- Shr7 = 1,
- Shr15 = 2,
-};
-
-enum class VmnmxType : u64 {
- Bits8,
- Bits16,
- Bits32,
-};
-
-enum class VmnmxOperation : u64 {
- Mrg_16H = 0,
- Mrg_16L = 1,
- Mrg_8B0 = 2,
- Mrg_8B2 = 3,
- Acc = 4,
- Min = 5,
- Max = 6,
- Nop = 7,
-};
-
-enum class XmadMode : u64 {
- None = 0,
- CLo = 1,
- CHi = 2,
- CSfu = 3,
- CBcc = 4,
-};
-
-enum class IAdd3Mode : u64 {
- None = 0,
- RightShift = 1,
- LeftShift = 2,
-};
-
-enum class IAdd3Height : u64 {
- None = 0,
- LowerHalfWord = 1,
- UpperHalfWord = 2,
-};
-
-enum class FlowCondition : u64 {
- Always = 0xF,
- Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
-};
-
-enum class ConditionCode : u64 {
- F = 0,
- LT = 1,
- EQ = 2,
- LE = 3,
- GT = 4,
- NE = 5,
- GE = 6,
- Num = 7,
- Nan = 8,
- LTU = 9,
- EQU = 10,
- LEU = 11,
- GTU = 12,
- NEU = 13,
- GEU = 14,
- T = 15,
- OFF = 16,
- LO = 17,
- SFF = 18,
- LS = 19,
- HI = 20,
- SFT = 21,
- HS = 22,
- OFT = 23,
- CSM_TA = 24,
- CSM_TR = 25,
- CSM_MX = 26,
- FCSM_TA = 27,
- FCSM_TR = 28,
- FCSM_MX = 29,
- RLE = 30,
- RGT = 31,
-};
-
-enum class PredicateResultMode : u64 {
- None = 0x0,
- NotZero = 0x3,
-};
-
-enum class TextureType : u64 {
- Texture1D = 0,
- Texture2D = 1,
- Texture3D = 2,
- TextureCube = 3,
-};
-
-enum class TextureQueryType : u64 {
- Dimension = 1,
- TextureType = 2,
- SamplePosition = 5,
- Filter = 16,
- LevelOfDetail = 18,
- Wrap = 20,
- BorderColor = 22,
-};
-
-enum class TextureProcessMode : u64 {
- None = 0,
- LZ = 1, // Load LOD of zero.
- LB = 2, // Load Bias.
- LL = 3, // Load LOD.
- LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
- LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
-};
-
-enum class TextureMiscMode : u64 {
- DC,
- AOFFI, // Uses Offset
- NDV,
- NODEP,
- MZ,
- PTP,
-};
-
-enum class SurfaceDataMode : u64 {
- P = 0,
- D_BA = 1,
-};
-
-enum class OutOfBoundsStore : u64 {
- Ignore = 0,
- Clamp = 1,
- Trap = 2,
-};
-
-enum class ImageType : u64 {
- Texture1D = 0,
- TextureBuffer = 1,
- Texture1DArray = 2,
- Texture2D = 3,
- Texture2DArray = 4,
- Texture3D = 5,
-};
-
-enum class IsberdMode : u64 {
- None = 0,
- Patch = 1,
- Prim = 2,
- Attr = 3,
-};
-
-enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
-
-enum class MembarType : u64 {
- CTA = 0,
- GL = 1,
- SYS = 2,
- VC = 3,
-};
-
-enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
-
-enum class HalfType : u64 {
- H0_H1 = 0,
- F32 = 1,
- H0_H0 = 2,
- H1_H1 = 3,
-};
-
-enum class HalfMerge : u64 {
- H0_H1 = 0,
- F32 = 1,
- Mrg_H0 = 2,
- Mrg_H1 = 3,
-};
-
-enum class HalfPrecision : u64 {
- None = 0,
- FTZ = 1,
- FMZ = 2,
-};
-
-enum class R2pMode : u64 {
- Pr = 0,
- Cc = 1,
-};
-
-enum class IpaInterpMode : u64 {
- Pass = 0,
- Multiply = 1,
- Constant = 2,
- Sc = 3,
-};
-
-enum class IpaSampleMode : u64 {
- Default = 0,
- Centroid = 1,
- Offset = 2,
-};
-
-enum class LmemLoadCacheManagement : u64 {
- Default = 0,
- LU = 1,
- CI = 2,
- CV = 3,
-};
-
-enum class StoreCacheManagement : u64 {
- Default = 0,
- CG = 1,
- CS = 2,
- WT = 3,
-};
-
-struct IpaMode {
- IpaInterpMode interpolation_mode;
- IpaSampleMode sampling_mode;
-
- [[nodiscard]] bool operator==(const IpaMode& a) const {
- return std::tie(interpolation_mode, sampling_mode) ==
- std::tie(a.interpolation_mode, a.sampling_mode);
- }
- [[nodiscard]] bool operator!=(const IpaMode& a) const {
- return !operator==(a);
- }
- [[nodiscard]] bool operator<(const IpaMode& a) const {
- return std::tie(interpolation_mode, sampling_mode) <
- std::tie(a.interpolation_mode, a.sampling_mode);
- }
-};
-
-enum class SystemVariable : u64 {
- LaneId = 0x00,
- VirtCfg = 0x02,
- VirtId = 0x03,
- Pm0 = 0x04,
- Pm1 = 0x05,
- Pm2 = 0x06,
- Pm3 = 0x07,
- Pm4 = 0x08,
- Pm5 = 0x09,
- Pm6 = 0x0a,
- Pm7 = 0x0b,
- OrderingTicket = 0x0f,
- PrimType = 0x10,
- InvocationId = 0x11,
- Ydirection = 0x12,
- ThreadKill = 0x13,
- ShaderType = 0x14,
- DirectBeWriteAddressLow = 0x15,
- DirectBeWriteAddressHigh = 0x16,
- DirectBeWriteEnabled = 0x17,
- MachineId0 = 0x18,
- MachineId1 = 0x19,
- MachineId2 = 0x1a,
- MachineId3 = 0x1b,
- Affinity = 0x1c,
- InvocationInfo = 0x1d,
- WscaleFactorXY = 0x1e,
- WscaleFactorZ = 0x1f,
- Tid = 0x20,
- TidX = 0x21,
- TidY = 0x22,
- TidZ = 0x23,
- CtaParam = 0x24,
- CtaIdX = 0x25,
- CtaIdY = 0x26,
- CtaIdZ = 0x27,
- NtId = 0x28,
- CirQueueIncrMinusOne = 0x29,
- Nlatc = 0x2a,
- SmSpaVersion = 0x2c,
- MultiPassShaderInfo = 0x2d,
- LwinHi = 0x2e,
- SwinHi = 0x2f,
- SwinLo = 0x30,
- SwinSz = 0x31,
- SmemSz = 0x32,
- SmemBanks = 0x33,
- LwinLo = 0x34,
- LwinSz = 0x35,
- LmemLosz = 0x36,
- LmemHioff = 0x37,
- EqMask = 0x38,
- LtMask = 0x39,
- LeMask = 0x3a,
- GtMask = 0x3b,
- GeMask = 0x3c,
- RegAlloc = 0x3d,
- CtxAddr = 0x3e, // .fmask = F_SM50
- BarrierAlloc = 0x3e, // .fmask = F_SM60
- GlobalErrorStatus = 0x40,
- WarpErrorStatus = 0x42,
- WarpErrorStatusClear = 0x43,
- PmHi0 = 0x48,
- PmHi1 = 0x49,
- PmHi2 = 0x4a,
- PmHi3 = 0x4b,
- PmHi4 = 0x4c,
- PmHi5 = 0x4d,
- PmHi6 = 0x4e,
- PmHi7 = 0x4f,
- ClockLo = 0x50,
- ClockHi = 0x51,
- GlobalTimerLo = 0x52,
- GlobalTimerHi = 0x53,
- HwTaskId = 0x60,
- CircularQueueEntryIndex = 0x61,
- CircularQueueEntryAddressLow = 0x62,
- CircularQueueEntryAddressHigh = 0x63,
-};
-
-enum class PhysicalAttributeDirection : u64 {
- Input = 0,
- Output = 1,
-};
-
-enum class VoteOperation : u64 {
- All = 0, // allThreadsNV
- Any = 1, // anyThreadNV
- Eq = 2, // allThreadsEqualNV
-};
-
-enum class ImageAtomicOperationType : u64 {
- U32 = 0,
- S32 = 1,
- U64 = 2,
- F32 = 3,
- S64 = 5,
- SD32 = 6,
- SD64 = 7,
-};
-
-enum class ImageAtomicOperation : u64 {
- Add = 0,
- Min = 1,
- Max = 2,
- Inc = 3,
- Dec = 4,
- And = 5,
- Or = 6,
- Xor = 7,
- Exch = 8,
-};
-
-enum class ShuffleOperation : u64 {
- Idx = 0, // shuffleNV
- Up = 1, // shuffleUpNV
- Down = 2, // shuffleDownNV
- Bfly = 3, // shuffleXorNV
-};
-
-enum class ShfType : u64 {
- Bits32 = 0,
- U64 = 2,
- S64 = 3,
-};
-
-enum class ShfXmode : u64 {
- None = 0,
- HI = 1,
- X = 2,
- XHI = 3,
-};
-
-union Instruction {
- constexpr Instruction& operator=(const Instruction& instr) {
- value = instr.value;
- return *this;
- }
-
- constexpr Instruction(u64 value_) : value{value_} {}
- constexpr Instruction(const Instruction& instr) : value(instr.value) {}
-
- [[nodiscard]] constexpr bool Bit(u64 offset) const {
- return ((value >> offset) & 1) != 0;
- }
-
- BitField<0, 8, Register> gpr0;
- BitField<8, 8, Register> gpr8;
- union {
- BitField<16, 4, Pred> full_pred;
- BitField<16, 3, u64> pred_index;
- } pred;
- BitField<19, 1, u64> negate_pred;
- BitField<20, 8, Register> gpr20;
- BitField<20, 4, SubOp> sub_op;
- BitField<28, 8, Register> gpr28;
- BitField<39, 8, Register> gpr39;
- BitField<48, 16, u64> opcode;
-
- union {
- BitField<8, 5, ConditionCode> cc;
- BitField<13, 1, u64> trigger;
- } nop;
-
- union {
- BitField<48, 2, VoteOperation> operation;
- BitField<45, 3, u64> dest_pred;
- BitField<39, 3, u64> value;
- BitField<42, 1, u64> negate_value;
- } vote;
-
- union {
- BitField<30, 2, ShuffleOperation> operation;
- BitField<48, 3, u64> pred48;
- BitField<28, 1, u64> is_index_imm;
- BitField<29, 1, u64> is_mask_imm;
- BitField<20, 5, u64> index_imm;
- BitField<34, 13, u64> mask_imm;
- } shfl;
-
- union {
- BitField<44, 1, u64> ftz;
- BitField<39, 2, u64> tab5cb8_2;
- BitField<38, 1, u64> ndv;
- BitField<47, 1, u64> cc;
- BitField<28, 8, u64> swizzle;
- } fswzadd;
-
- union {
- BitField<8, 8, Register> gpr;
- BitField<20, 24, s64> offset;
- } gmem;
-
- union {
- BitField<20, 16, u64> imm20_16;
- BitField<20, 19, u64> imm20_19;
- BitField<20, 32, s64> imm20_32;
- BitField<45, 1, u64> negate_b;
- BitField<46, 1, u64> abs_a;
- BitField<48, 1, u64> negate_a;
- BitField<49, 1, u64> abs_b;
- BitField<50, 1, u64> saturate_d;
- BitField<56, 1, u64> negate_imm;
-
- union {
- BitField<39, 3, u64> pred;
- BitField<42, 1, u64> negate_pred;
- } fmnmx;
-
- union {
- BitField<39, 1, u64> invert_a;
- BitField<40, 1, u64> invert_b;
- BitField<41, 2, LogicOperation> operation;
- BitField<44, 2, PredicateResultMode> pred_result_mode;
- BitField<48, 3, Pred> pred48;
- } lop;
-
- union {
- BitField<53, 2, LogicOperation> operation;
- BitField<55, 1, u64> invert_a;
- BitField<56, 1, u64> invert_b;
- } lop32i;
-
- union {
- BitField<28, 8, u64> imm_lut28;
- BitField<48, 8, u64> imm_lut48;
-
- [[nodiscard]] u32 GetImmLut28() const {
- return static_cast<u32>(imm_lut28);
- }
-
- [[nodiscard]] u32 GetImmLut48() const {
- return static_cast<u32>(imm_lut48);
- }
- } lop3;
-
- [[nodiscard]] u16 GetImm20_16() const {
- return static_cast<u16>(imm20_16);
- }
-
- [[nodiscard]] u32 GetImm20_19() const {
- u32 imm{static_cast<u32>(imm20_19)};
- imm <<= 12;
- imm |= negate_imm ? 0x80000000 : 0;
- return imm;
- }
-
- [[nodiscard]] u32 GetImm20_32() const {
- return static_cast<u32>(imm20_32);
- }
-
- [[nodiscard]] s32 GetSignedImm20_20() const {
- const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
- // Sign extend the 20-bit value.
- const auto mask = 1U << (20 - 1);
- return static_cast<s32>((immediate ^ mask) - mask);
- }
- } alu;
-
- union {
- BitField<38, 1, u64> idx;
- BitField<51, 1, u64> saturate;
- BitField<52, 2, IpaSampleMode> sample_mode;
- BitField<54, 2, IpaInterpMode> interp_mode;
- } ipa;
-
- union {
- BitField<39, 2, u64> tab5cb8_2;
- BitField<41, 3, u64> postfactor;
- BitField<44, 2, u64> tab5c68_0;
- BitField<48, 1, u64> negate_b;
- } fmul;
-
- union {
- BitField<55, 1, u64> saturate;
- } fmul32;
-
- union {
- BitField<52, 1, u64> generates_cc;
- } op_32;
-
- union {
- BitField<48, 1, u64> is_signed;
- } shift;
-
- union {
- BitField<39, 1, u64> wrap;
- } shr;
-
- union {
- BitField<37, 2, ShfType> type;
- BitField<48, 2, ShfXmode> xmode;
- BitField<50, 1, u64> wrap;
- BitField<20, 6, u64> immediate;
- } shf;
-
- union {
- BitField<39, 5, u64> shift_amount;
- BitField<48, 1, u64> negate_b;
- BitField<49, 1, u64> negate_a;
- } alu_integer;
-
- union {
- BitField<43, 1, u64> x;
- } iadd;
-
- union {
- BitField<39, 1, u64> ftz;
- BitField<32, 1, u64> saturate;
- BitField<49, 2, HalfMerge> merge;
-
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
-
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
-
- BitField<35, 2, HalfType> type_c;
- } alu_half;
-
- union {
- BitField<39, 2, HalfPrecision> precision;
- BitField<39, 1, u64> ftz;
- BitField<52, 1, u64> saturate;
- BitField<49, 2, HalfMerge> merge;
-
- BitField<43, 1, u64> negate_a;
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
- } alu_half_imm;
-
- union {
- BitField<29, 1, u64> first_negate;
- BitField<20, 9, u64> first;
-
- BitField<56, 1, u64> second_negate;
- BitField<30, 9, u64> second;
-
- [[nodiscard]] u32 PackImmediates() const {
- // Immediates are half floats shifted.
- constexpr u32 imm_shift = 6;
- return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
- }
- } half_imm;
-
- union {
- union {
- BitField<37, 2, HalfPrecision> precision;
- BitField<32, 1, u64> saturate;
-
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> negate_c;
- BitField<35, 2, HalfType> type_c;
- } rr;
-
- BitField<57, 2, HalfPrecision> precision;
- BitField<52, 1, u64> saturate;
-
- BitField<49, 2, HalfMerge> merge;
-
- BitField<47, 2, HalfType> type_a;
-
- BitField<56, 1, u64> negate_b;
- BitField<28, 2, HalfType> type_b;
-
- BitField<51, 1, u64> negate_c;
- BitField<53, 2, HalfType> type_reg39;
- } hfma2;
-
- union {
- BitField<40, 1, u64> invert;
- } popc;
-
- union {
- BitField<41, 1, u64> sh;
- BitField<40, 1, u64> invert;
- BitField<48, 1, u64> is_signed;
- } flo;
-
- union {
- BitField<39, 3, u64> pred;
- BitField<42, 1, u64> neg_pred;
- } sel;
-
- union {
- BitField<39, 3, u64> pred;
- BitField<42, 1, u64> negate_pred;
- BitField<43, 2, IMinMaxExchange> exchange;
- BitField<48, 1, u64> is_signed;
- } imnmx;
-
- union {
- BitField<31, 2, IAdd3Height> height_c;
- BitField<33, 2, IAdd3Height> height_b;
- BitField<35, 2, IAdd3Height> height_a;
- BitField<37, 2, IAdd3Mode> mode;
- BitField<49, 1, u64> neg_c;
- BitField<50, 1, u64> neg_b;
- BitField<51, 1, u64> neg_a;
- } iadd3;
-
- union {
- BitField<54, 1, u64> saturate;
- BitField<56, 1, u64> negate_a;
- } iadd32i;
-
- union {
- BitField<53, 1, u64> negate_b;
- BitField<54, 1, u64> abs_a;
- BitField<56, 1, u64> negate_a;
- BitField<57, 1, u64> abs_b;
- } fadd32i;
-
- union {
- BitField<40, 1, u64> brev;
- BitField<47, 1, u64> rd_cc;
- BitField<48, 1, u64> is_signed;
- } bfe;
-
- union {
- BitField<48, 3, u64> pred48;
-
- union {
- BitField<20, 20, u64> entry_a;
- BitField<39, 5, u64> entry_b;
- BitField<45, 1, u64> neg;
- BitField<46, 1, u64> uses_cc;
- } imm;
-
- union {
- BitField<20, 14, u64> cb_index;
- BitField<34, 5, u64> cb_offset;
- BitField<56, 1, u64> neg;
- BitField<57, 1, u64> uses_cc;
- } hi;
-
- union {
- BitField<20, 14, u64> cb_index;
- BitField<34, 5, u64> cb_offset;
- BitField<39, 5, u64> entry_a;
- BitField<45, 1, u64> neg;
- BitField<46, 1, u64> uses_cc;
- } rz;
-
- union {
- BitField<39, 5, u64> entry_a;
- BitField<45, 1, u64> neg;
- BitField<46, 1, u64> uses_cc;
- } r1;
-
- union {
- BitField<28, 8, u64> entry_a;
- BitField<37, 1, u64> neg;
- BitField<38, 1, u64> uses_cc;
- } r2;
-
- } lea;
-
- union {
- BitField<0, 5, FlowCondition> cond;
- } flow;
-
- union {
- BitField<47, 1, u64> cc;
- BitField<48, 1, u64> negate_b;
- BitField<49, 1, u64> negate_c;
- BitField<51, 2, u64> tab5980_1;
- BitField<53, 2, u64> tab5980_0;
- } ffma;
-
- union {
- BitField<48, 3, UniformType> type;
- BitField<44, 2, u64> unknown;
- } ld_c;
-
- union {
- BitField<48, 3, StoreType> type;
- } ldst_sl;
-
- union {
- BitField<44, 2, u64> unknown;
- } ld_l;
-
- union {
- BitField<44, 2, StoreCacheManagement> cache_management;
- } st_l;
-
- union {
- BitField<48, 3, UniformType> type;
- BitField<46, 2, u64> cache_mode;
- } ldg;
-
- union {
- BitField<48, 3, UniformType> type;
- BitField<46, 2, u64> cache_mode;
- } stg;
-
- union {
- BitField<23, 3, AtomicOp> operation;
- BitField<48, 1, u64> extended;
- BitField<20, 3, GlobalAtomicType> type;
- } red;
-
- union {
- BitField<52, 4, AtomicOp> operation;
- BitField<49, 3, GlobalAtomicType> type;
- BitField<28, 20, s64> offset;
- } atom;
-
- union {
- BitField<52, 4, AtomicOp> operation;
- BitField<28, 2, AtomicType> type;
- BitField<30, 22, s64> offset;
-
- [[nodiscard]] s32 GetImmediateOffset() const {
- return static_cast<s32>(offset << 2);
- }
- } atoms;
-
- union {
- BitField<32, 1, PhysicalAttributeDirection> direction;
- BitField<47, 3, AttributeSize> size;
- BitField<20, 11, u64> address;
- } al2p;
-
- union {
- BitField<53, 3, UniformType> type;
- BitField<52, 1, u64> extended;
- } generic;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<6, 1, u64> neg_b;
- BitField<7, 1, u64> abs_a;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<43, 1, u64> neg_a;
- BitField<44, 1, u64> abs_b;
- BitField<45, 2, PredOperation> op;
- BitField<47, 1, u64> ftz;
- BitField<48, 4, PredCondition> cond;
- } fsetp;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<45, 2, PredOperation> op;
- BitField<48, 1, u64> is_signed;
- BitField<49, 3, PredCondition> cond;
- } isetp;
-
- union {
- BitField<48, 1, u64> is_signed;
- BitField<49, 3, PredCondition> cond;
- } icmp;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<12, 3, u64> pred12;
- BitField<15, 1, u64> neg_pred12;
- BitField<24, 2, PredOperation> cond;
- BitField<29, 3, u64> pred29;
- BitField<32, 1, u64> neg_pred29;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred39;
- BitField<45, 2, PredOperation> op;
- } psetp;
-
- union {
- BitField<43, 4, PredCondition> cond;
- BitField<45, 2, PredOperation> op;
- BitField<3, 3, u64> pred3;
- BitField<0, 3, u64> pred0;
- BitField<39, 3, u64> pred39;
- } vsetp;
-
- union {
- BitField<12, 3, u64> pred12;
- BitField<15, 1, u64> neg_pred12;
- BitField<24, 2, PredOperation> cond;
- BitField<29, 3, u64> pred29;
- BitField<32, 1, u64> neg_pred29;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred39;
- BitField<44, 1, u64> bf;
- BitField<45, 2, PredOperation> op;
- } pset;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<8, 5, ConditionCode> cc; // flag in cc
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred39;
- BitField<45, 4, PredOperation> op; // op with pred39
- } csetp;
-
- union {
- BitField<6, 1, u64> ftz;
- BitField<45, 2, PredOperation> op;
- BitField<3, 3, u64> pred3;
- BitField<0, 3, u64> pred0;
- BitField<43, 1, u64> negate_a;
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
- union {
- BitField<35, 4, PredCondition> cond;
- BitField<49, 1, u64> h_and;
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
- } reg;
- union {
- BitField<56, 1, u64> negate_b;
- BitField<54, 1, u64> abs_b;
- } cbuf;
- union {
- BitField<49, 4, PredCondition> cond;
- BitField<53, 1, u64> h_and;
- } cbuf_and_imm;
- BitField<42, 1, u64> neg_pred;
- BitField<39, 3, u64> pred39;
- } hsetp2;
-
- union {
- BitField<40, 1, R2pMode> mode;
- BitField<41, 2, u64> byte;
- BitField<20, 7, u64> immediate_mask;
- } p2r_r2p;
-
- union {
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<43, 1, u64> neg_a;
- BitField<44, 1, u64> abs_b;
- BitField<45, 2, PredOperation> op;
- BitField<48, 4, PredCondition> cond;
- BitField<52, 1, u64> bf;
- BitField<53, 1, u64> neg_b;
- BitField<54, 1, u64> abs_a;
- BitField<55, 1, u64> ftz;
- } fset;
-
- union {
- BitField<47, 1, u64> ftz;
- BitField<48, 4, PredCondition> cond;
- } fcmp;
-
- union {
- BitField<49, 1, u64> bf;
- BitField<35, 3, PredCondition> cond;
- BitField<50, 1, u64> ftz;
- BitField<45, 2, PredOperation> op;
- BitField<43, 1, u64> negate_a;
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
- BitField<42, 1, u64> neg_pred;
- BitField<39, 3, u64> pred39;
- } hset2;
-
- union {
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<44, 1, u64> bf;
- BitField<45, 2, PredOperation> op;
- BitField<48, 1, u64> is_signed;
- BitField<49, 3, PredCondition> cond;
- } iset;
-
- union {
- BitField<45, 1, u64> negate_a;
- BitField<49, 1, u64> abs_a;
- BitField<10, 2, Register::Size> src_size;
- BitField<13, 1, u64> is_input_signed;
- BitField<8, 2, Register::Size> dst_size;
- BitField<12, 1, u64> is_output_signed;
-
- union {
- BitField<39, 2, u64> tab5cb8_2;
- } i2f;
-
- union {
- BitField<39, 2, F2iRoundingOp> rounding;
- } f2i;
-
- union {
- BitField<39, 4, u64> rounding;
- // H0, H1 extract for F16 missing
- BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
- [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
- constexpr u64 rounding_mask = 0x0B;
- return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
- }
- } f2f;
-
- union {
- BitField<41, 2, u64> selector;
- } int_src;
-
- union {
- BitField<41, 1, u64> selector;
- } float_src;
- } conversion;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<31, 4, u64> component_mask;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<54, 1, u64> aoffi_flag;
- BitField<55, 3, TextureProcessMode> process_mode;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- return process_mode;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- default:
- break;
- }
- return false;
- }
- } tex;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<31, 4, u64> component_mask;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<36, 1, u64> aoffi_flag;
- BitField<37, 3, TextureProcessMode> process_mode;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- return process_mode;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- default:
- break;
- }
- return false;
- }
- } tex_b;
-
- union {
- BitField<22, 6, TextureQueryType> query_type;
- BitField<31, 4, u64> component_mask;
- BitField<49, 1, u64> nodep_flag;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
- } txq;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<31, 4, u64> component_mask;
- BitField<35, 1, u64> ndv_flag;
- BitField<49, 1, u64> nodep_flag;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NDV:
- return (ndv_flag != 0);
- case TextureMiscMode::NODEP:
- return (nodep_flag != 0);
- default:
- break;
- }
- return false;
- }
- } tmml;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<35, 1, u64> ndv_flag;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<54, 2, u64> offset_mode;
- BitField<56, 2, u64> component;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NDV:
- return ndv_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::AOFFI:
- return offset_mode == 1;
- case TextureMiscMode::PTP:
- return offset_mode == 2;
- default:
- break;
- }
- return false;
- }
- } tld4;
-
- union {
- BitField<35, 1, u64> ndv_flag;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<33, 2, u64> offset_mode;
- BitField<37, 2, u64> component;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NDV:
- return ndv_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::AOFFI:
- return offset_mode == 1;
- case TextureMiscMode::PTP:
- return offset_mode == 2;
- default:
- break;
- }
- return false;
- }
- } tld4_b;
-
- union {
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<51, 1, u64> aoffi_flag;
- BitField<52, 2, u64> component;
- BitField<55, 1, u64> fp16_flag;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- default:
- break;
- }
- return false;
- }
- } tld4s;
-
- union {
- BitField<0, 8, Register> gpr0;
- BitField<28, 8, Register> gpr28;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 3, u64> component_mask_selector;
- BitField<53, 4, u64> texture_info;
- BitField<59, 1, u64> fp32_flag;
-
- [[nodiscard]] TextureType GetTextureType() const {
- // The TEXS instruction has a weird encoding for the texture type.
- if (texture_info == 0) {
- return TextureType::Texture1D;
- }
- if (texture_info >= 1 && texture_info <= 9) {
- return TextureType::Texture2D;
- }
- if (texture_info >= 10 && texture_info <= 11) {
- return TextureType::Texture3D;
- }
- if (texture_info >= 12 && texture_info <= 13) {
- return TextureType::TextureCube;
- }
-
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
- UNREACHABLE();
- return TextureType::Texture1D;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- switch (texture_info) {
- case 0:
- case 2:
- case 6:
- case 8:
- case 9:
- case 11:
- return TextureProcessMode::LZ;
- case 3:
- case 5:
- case 13:
- return TextureProcessMode::LL;
- default:
- break;
- }
- return TextureProcessMode::None;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- [[nodiscard]] bool IsArrayTexture() const {
- // TEXS only supports Texture2D arrays.
- return texture_info >= 7 && texture_info <= 9;
- }
-
- [[nodiscard]] bool HasTwoDestinations() const {
- return gpr28.Value() != Register::ZeroIndex;
- }
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
- {},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x7, 0xb, 0xd, 0xe, 0xf},
- }};
-
- std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
- index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
-
- u32 mask = mask_lut[index][component_mask_selector];
- // A mask of 0 means this instruction uses an unimplemented mask.
- ASSERT(mask != 0);
- return ((1ull << component) & mask) != 0;
- }
- } texs;
-
- union {
- BitField<28, 1, u64> is_array;
- BitField<29, 2, TextureType> texture_type;
- BitField<35, 1, u64> aoffi;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> ms; // Multisample?
- BitField<54, 1, u64> cl;
- BitField<55, 1, u64> process_mode;
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
- }
- } tld;
-
- union {
- BitField<49, 1, u64> nodep_flag;
- BitField<53, 4, u64> texture_info;
- BitField<59, 1, u64> fp32_flag;
-
- [[nodiscard]] TextureType GetTextureType() const {
- // The TLDS instruction has a weird encoding for the texture type.
- if (texture_info <= 1) {
- return TextureType::Texture1D;
- }
- if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
- (texture_info >= 4 && texture_info <= 6)) {
- return TextureType::Texture2D;
- }
- if (texture_info == 7) {
- return TextureType::Texture3D;
- }
-
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
- UNREACHABLE();
- return TextureType::Texture1D;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
- return TextureProcessMode::LL;
- }
- return TextureProcessMode::LZ;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::AOFFI:
- return texture_info == 12 || texture_info == 4;
- case TextureMiscMode::MZ:
- return texture_info == 5;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- [[nodiscard]] bool IsArrayTexture() const {
- // TEXS only supports Texture2D arrays.
- return texture_info == 8;
- }
- } tlds;
-
- union {
- BitField<28, 1, u64> is_array;
- BitField<29, 2, TextureType> texture_type;
- BitField<35, 1, u64> aoffi_flag;
- BitField<49, 1, u64> nodep_flag;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- } txd;
-
- union {
- BitField<24, 2, StoreCacheManagement> cache_management;
- BitField<33, 3, ImageType> image_type;
- BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
- BitField<51, 1, u64> is_immediate;
- BitField<52, 1, SurfaceDataMode> mode;
-
- BitField<20, 3, StoreType> store_data_layout;
- BitField<20, 4, u64> component_mask_selector;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- ASSERT(mode == SurfaceDataMode::P);
- constexpr u8 R = 0b0001;
- constexpr u8 G = 0b0010;
- constexpr u8 B = 0b0100;
- constexpr u8 A = 0b1000;
- constexpr std::array<u8, 16> mask = {
- 0, (R), (G), (R | G), (B), (R | B),
- (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
- (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
- return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
- }
-
- [[nodiscard]] StoreType GetStoreDataLayout() const {
- ASSERT(mode == SurfaceDataMode::D_BA);
- return store_data_layout;
- }
- } suldst;
-
- union {
- BitField<28, 1, u64> is_ba;
- BitField<51, 3, ImageAtomicOperationType> operation_type;
- BitField<33, 3, ImageType> image_type;
- BitField<29, 4, ImageAtomicOperation> operation;
- BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
- } suatom_d;
-
- union {
- BitField<20, 24, u64> target;
- BitField<5, 1, u64> constant_buffer;
-
- [[nodiscard]] s32 GetBranchTarget() const {
- // Sign extend the branch target offset
- const auto mask = 1U << (24 - 1);
- const auto target_value = static_cast<u32>(target);
- constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
- // The branch offset is relative to the next instruction and is stored in bytes, so
- // divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
- }
- } bra;
-
- union {
- BitField<20, 24, u64> target;
- BitField<5, 1, u64> constant_buffer;
-
- [[nodiscard]] s32 GetBranchExtend() const {
- // Sign extend the branch target offset
- const auto mask = 1U << (24 - 1);
- const auto target_value = static_cast<u32>(target);
- constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
- // The branch offset is relative to the next instruction and is stored in bytes, so
- // divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
- }
- } brx;
-
- union {
- BitField<39, 1, u64> emit; // EmitVertex
- BitField<40, 1, u64> cut; // EndPrimitive
- } out;
-
- union {
- BitField<31, 1, u64> skew;
- BitField<32, 1, u64> o;
- BitField<33, 2, IsberdMode> mode;
- BitField<47, 2, IsberdShift> shift;
- } isberd;
-
- union {
- BitField<8, 2, MembarType> type;
- BitField<0, 2, MembarUnknown> unknown;
- } membar;
-
- union {
- BitField<48, 1, u64> signed_a;
- BitField<38, 1, u64> is_byte_chunk_a;
- BitField<36, 2, VideoType> type_a;
- BitField<36, 2, u64> byte_height_a;
-
- BitField<49, 1, u64> signed_b;
- BitField<50, 1, u64> use_register_b;
- BitField<30, 1, u64> is_byte_chunk_b;
- BitField<28, 2, VideoType> type_b;
- BitField<28, 2, u64> byte_height_b;
- } video;
-
- union {
- BitField<51, 2, VmadShr> shr;
- BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
- BitField<47, 1, u64> cc;
- } vmad;
-
- union {
- BitField<54, 1, u64> is_dest_signed;
- BitField<48, 1, u64> is_src_a_signed;
- BitField<49, 1, u64> is_src_b_signed;
- BitField<37, 2, u64> src_format_a;
- BitField<29, 2, u64> src_format_b;
- BitField<56, 1, u64> mx;
- BitField<55, 1, u64> sat;
- BitField<36, 2, u64> selector_a;
- BitField<28, 2, u64> selector_b;
- BitField<50, 1, u64> is_op_b_register;
- BitField<51, 3, VmnmxOperation> operation;
-
- [[nodiscard]] VmnmxType SourceFormatA() const {
- switch (src_format_a) {
- case 0b11:
- return VmnmxType::Bits32;
- case 0b10:
- return VmnmxType::Bits16;
- default:
- return VmnmxType::Bits8;
- }
- }
-
- [[nodiscard]] VmnmxType SourceFormatB() const {
- switch (src_format_b) {
- case 0b11:
- return VmnmxType::Bits32;
- case 0b10:
- return VmnmxType::Bits16;
- default:
- return VmnmxType::Bits8;
- }
- }
- } vmnmx;
-
- union {
- BitField<20, 16, u64> imm20_16;
- BitField<35, 1, u64> high_b_rr; // used on RR
- BitField<36, 1, u64> product_shift_left;
- BitField<37, 1, u64> merge_37;
- BitField<48, 1, u64> sign_a;
- BitField<49, 1, u64> sign_b;
- BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
- BitField<50, 3, XmadMode> mode;
- BitField<52, 1, u64> high_b;
- BitField<53, 1, u64> high_a;
- BitField<55, 1, u64> product_shift_left_second; // used on CR
- BitField<56, 1, u64> merge_56;
- } xmad;
-
- union {
- BitField<20, 14, u64> shifted_offset;
- BitField<34, 5, u64> index;
-
- [[nodiscard]] u64 GetOffset() const {
- return shifted_offset * 4;
- }
- } cbuf34;
-
- union {
- BitField<20, 16, s64> offset;
- BitField<36, 5, u64> index;
-
- [[nodiscard]] s64 GetOffset() const {
- return offset;
- }
- } cbuf36;
-
- // Unsure about the size of this one.
- // It's always used with a gpr0, so any size should be fine.
- BitField<20, 8, SystemVariable> sys20;
-
- BitField<47, 1, u64> generates_cc;
- BitField<61, 1, u64> is_b_imm;
- BitField<60, 1, u64> is_b_gpr;
- BitField<59, 1, u64> is_c_gpr;
- BitField<20, 24, s64> smem_imm;
- BitField<0, 5, ConditionCode> flow_condition_code;
-
- Attribute attribute;
- Sampler sampler;
- Image image;
-
- u64 value;
-};
-static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
-static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
-
-class OpCode {
-public:
- enum class Id {
- KIL,
- SSY,
- SYNC,
- BRK,
- DEPBAR,
- VOTE,
- VOTE_VTG,
- SHFL,
- FSWZADD,
- BFE_C,
- BFE_R,
- BFE_IMM,
- BFI_RC,
- BFI_IMM_R,
- BRA,
- BRX,
- PBK,
- LD_A,
- LD_L,
- LD_S,
- LD_C,
- LD, // Load from generic memory
- LDG, // Load from global memory
- ST_A,
- ST_L,
- ST_S,
- ST, // Store in generic memory
- STG, // Store in global memory
- RED, // Reduction operation
- ATOM, // Atomic operation on global memory
- ATOMS, // Atomic operation on shared memory
- AL2P, // Transforms attribute memory into physical memory
- TEX,
- TEX_B, // Texture Load Bindless
- TXQ, // Texture Query
- TXQ_B, // Texture Query Bindless
- TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
- TLD, // Texture Load
- TLDS, // Texture Load with scalar/non-vec4 source/destinations
- TLD4, // Texture Gather 4
- TLD4_B, // Texture Gather 4 Bindless
- TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
- TMML_B, // Texture Mip Map Level
- TMML, // Texture Mip Map Level
- TXD, // Texture Gradient/Load with Derivates
- TXD_B, // Texture Gradient/Load with Derivates Bindless
- SUST, // Surface Store
- SULD, // Surface Load
- SUATOM, // Surface Atomic Operation
- EXIT,
- NOP,
- IPA,
- OUT_R, // Emit vertex/primitive
- ISBERD,
- BAR,
- MEMBAR,
- VMAD,
- VSETP,
- VMNMX,
- FFMA_IMM, // Fused Multiply and Add
- FFMA_CR,
- FFMA_RC,
- FFMA_RR,
- FADD_C,
- FADD_R,
- FADD_IMM,
- FADD32I,
- FMUL_C,
- FMUL_R,
- FMUL_IMM,
- FMUL32_IMM,
- IADD_C,
- IADD_R,
- IADD_IMM,
- IADD3_C, // Add 3 Integers
- IADD3_R,
- IADD3_IMM,
- IADD32I,
- ISCADD_C, // Scale and Add
- ISCADD_R,
- ISCADD_IMM,
- FLO_R,
- FLO_C,
- FLO_IMM,
- LEA_R1,
- LEA_R2,
- LEA_RZ,
- LEA_IMM,
- LEA_HI,
- HADD2_C,
- HADD2_R,
- HADD2_IMM,
- HMUL2_C,
- HMUL2_R,
- HMUL2_IMM,
- HFMA2_CR,
- HFMA2_RC,
- HFMA2_RR,
- HFMA2_IMM_R,
- HSETP2_C,
- HSETP2_R,
- HSETP2_IMM,
- HSET2_C,
- HSET2_R,
- HSET2_IMM,
- POPC_C,
- POPC_R,
- POPC_IMM,
- SEL_C,
- SEL_R,
- SEL_IMM,
- ICMP_RC,
- ICMP_R,
- ICMP_CR,
- ICMP_IMM,
- FCMP_RR,
- FCMP_RC,
- FCMP_IMMR,
- MUFU, // Multi-Function Operator
- RRO_C, // Range Reduction Operator
- RRO_R,
- RRO_IMM,
- F2F_C,
- F2F_R,
- F2F_IMM,
- F2I_C,
- F2I_R,
- F2I_IMM,
- I2F_C,
- I2F_R,
- I2F_IMM,
- I2I_C,
- I2I_R,
- I2I_IMM,
- LOP_C,
- LOP_R,
- LOP_IMM,
- LOP32I,
- LOP3_C,
- LOP3_R,
- LOP3_IMM,
- MOV_C,
- MOV_R,
- MOV_IMM,
- S2R,
- MOV32_IMM,
- SHL_C,
- SHL_R,
- SHL_IMM,
- SHR_C,
- SHR_R,
- SHR_IMM,
- SHF_RIGHT_R,
- SHF_RIGHT_IMM,
- SHF_LEFT_R,
- SHF_LEFT_IMM,
- FMNMX_C,
- FMNMX_R,
- FMNMX_IMM,
- IMNMX_C,
- IMNMX_R,
- IMNMX_IMM,
- FSETP_C, // Set Predicate
- FSETP_R,
- FSETP_IMM,
- FSET_C,
- FSET_R,
- FSET_IMM,
- ISETP_C,
- ISETP_IMM,
- ISETP_R,
- ISET_R,
- ISET_C,
- ISET_IMM,
- PSETP,
- PSET,
- CSETP,
- R2P_IMM,
- P2R_IMM,
- XMAD_IMM,
- XMAD_CR,
- XMAD_RC,
- XMAD_RR,
- };
-
- enum class Type {
- Trivial,
- Arithmetic,
- ArithmeticImmediate,
- ArithmeticInteger,
- ArithmeticIntegerImmediate,
- ArithmeticHalf,
- ArithmeticHalfImmediate,
- Bfe,
- Bfi,
- Shift,
- Ffma,
- Hfma2,
- Flow,
- Synch,
- Warp,
- Memory,
- Texture,
- Image,
- FloatSet,
- FloatSetPredicate,
- IntegerSet,
- IntegerSetPredicate,
- HalfSet,
- HalfSetPredicate,
- PredicateSetPredicate,
- PredicateSetRegister,
- RegisterSetPredicate,
- Conversion,
- Video,
- Xmad,
- Unknown,
- };
-
- /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
- /// conditionally executed).
- [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
- // TODO(Subv): Add the rest of unpredicated instructions.
- return opcode != Id::SSY && opcode != Id::PBK;
- }
-
- class Matcher {
- public:
- constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
- : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
-
- [[nodiscard]] constexpr const char* GetName() const {
- return name;
- }
-
- [[nodiscard]] constexpr u16 GetMask() const {
- return mask;
- }
-
- [[nodiscard]] constexpr Id GetId() const {
- return id;
- }
-
- [[nodiscard]] constexpr Type GetType() const {
- return type;
- }
-
- /**
- * Tests to see if the given instruction is the instruction this matcher represents.
- * @param instruction The instruction to test
- * @returns true if the given instruction matches.
- */
- [[nodiscard]] constexpr bool Matches(u16 instruction) const {
- return (instruction & mask) == expected;
- }
-
- private:
- const char* name;
- u16 mask;
- u16 expected;
- Id id;
- Type type;
- };
-
- using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
- [[nodiscard]] static DecodeResult Decode(Instruction instr) {
- static const auto table{GetDecodeTable()};
-
- const auto matches_instruction = [instr](const auto& matcher) {
- return matcher.Matches(static_cast<u16>(instr.opcode));
- };
-
- auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
- return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
- : std::nullopt;
- }
-
-private:
- struct Detail {
- private:
- static constexpr std::size_t opcode_bitsize = 16;
-
- /**
- * Generates the mask and the expected value after masking from a given bitstring.
- * A '0' in a bitstring indicates that a zero must be present at that bit position.
- * A '1' in a bitstring indicates that a one must be present at that bit position.
- */
- [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
- u16 mask = 0, expect = 0;
- for (std::size_t i = 0; i < opcode_bitsize; i++) {
- const std::size_t bit_position = opcode_bitsize - i - 1;
- switch (bitstring[i]) {
- case '0':
- mask |= static_cast<u16>(1U << bit_position);
- break;
- case '1':
- expect |= static_cast<u16>(1U << bit_position);
- mask |= static_cast<u16>(1U << bit_position);
- break;
- default:
- // Ignore
- break;
- }
- }
- return std::make_pair(mask, expect);
- }
-
- public:
- /// Creates a matcher that can match and parse instructions based on bitstring.
- [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
- Type type, const char* const name) {
- const auto [mask, expected] = GetMaskAndExpect(bitstring);
- return Matcher(name, mask, expected, op, type);
- }
- };
-
- [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
- std::vector<Matcher> table = {
-#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
- INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
- INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
- INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
- INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
- INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
- INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
- INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
- INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
- INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
- INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
- INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
- INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
- INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
- INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
- INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
- INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
- INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
- INST("100-------------", Id::LD, Type::Memory, "LD"),
- INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
- INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
- INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
- INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
- INST("101-------------", Id::ST, Type::Memory, "ST"),
- INST("1110111011011---", Id::STG, Type::Memory, "STG"),
- INST("1110101111111---", Id::RED, Type::Memory, "RED"),
- INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
- INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
- INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
- INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
- INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
- INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
- INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
- INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
- INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
- INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
- INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
- INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
- INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
- INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
- INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
- INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
- INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
- INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
- INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
- INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
- INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
- INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
- INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
- INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
- INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
- INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
- INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
- INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
- INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
- INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
- INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
- INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
- INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
- INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
- INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
- INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
- INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
- INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
- INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
- INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
- INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
- INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
- INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
- INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
- INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
- INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
- INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
- INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
- INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
- INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
- INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
- INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
- INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
- INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
- INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
- INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
- INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
- INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
- INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
- INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
- INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
- INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
- INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
- INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
- INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
- INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
- INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
- INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
- INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
- INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
- INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
- INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
- INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
- INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
- INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
- INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
- INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
- INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
- INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
- INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
- INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
- INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
- INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
- INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
- INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
- INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
- INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
- INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
- INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
- INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
- INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
- INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
- INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
- INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
- INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
- INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
- INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
- INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
- INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
- INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
- INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
- INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
- INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
- INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
- INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
- INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
- INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
- INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
- INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
- INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
- INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
- INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
- INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
- INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
- INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
- INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
- INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
- INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
- INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
- INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
- INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
- INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
- INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
- INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
- INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
- INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
- INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
- INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
- INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
- INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
- INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
- INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
- INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
- INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
- INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
- INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
- INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
- INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
- INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
- INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
- INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
- INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
- INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
- INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
- INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
- INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
- INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
- INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
- INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
- INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
- INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
- INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
- INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
- INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
- INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
- INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
- INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
- INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
- };
-#undef INST
- std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
- // If a matcher has more bits in its mask it is more specific, so it
- // should come first.
- return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
- });
-
- return table;
- }
-};
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c5..000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-enum class OutputTopology : u32 {
- PointList = 1,
- LineStrip = 6,
- TriangleStrip = 7,
-};
-
-enum class PixelImap : u8 {
- Unused = 0,
- Constant = 1,
- Perspective = 2,
- ScreenLinear = 3,
-};
-
-// Documentation in:
-// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
-struct Header {
- union {
- BitField<0, 5, u32> sph_type;
- BitField<5, 5, u32> version;
- BitField<10, 4, u32> shader_type;
- BitField<14, 1, u32> mrt_enable;
- BitField<15, 1, u32> kills_pixels;
- BitField<16, 1, u32> does_global_store;
- BitField<17, 4, u32> sass_version;
- BitField<21, 5, u32> reserved;
- BitField<26, 1, u32> does_load_or_store;
- BitField<27, 1, u32> does_fp64;
- BitField<28, 4, u32> stream_out_mask;
- } common0;
-
- union {
- BitField<0, 24, u32> shader_local_memory_low_size;
- BitField<24, 8, u32> per_patch_attribute_count;
- } common1;
-
- union {
- BitField<0, 24, u32> shader_local_memory_high_size;
- BitField<24, 8, u32> threads_per_input_primitive;
- } common2;
-
- union {
- BitField<0, 24, u32> shader_local_memory_crs_size;
- BitField<24, 4, OutputTopology> output_topology;
- BitField<28, 4, u32> reserved;
- } common3;
-
- union {
- BitField<0, 12, u32> max_output_vertices;
- BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
- BitField<20, 4, u32> reserved;
- BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4;
-
- union {
- struct {
- INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
- INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
- INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
- union {
- BitField<0, 8, u16> clip_distances;
- BitField<8, 1, u16> point_sprite_s;
- BitField<9, 1, u16> point_sprite_t;
- BitField<10, 1, u16> fog_coordinate;
- BitField<12, 1, u16> tessellation_eval_point_u;
- BitField<13, 1, u16> tessellation_eval_point_v;
- BitField<14, 1, u16> instance_id;
- BitField<15, 1, u16> vertex_id;
- };
- INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
- INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
- INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
- INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
- INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
- INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
- INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
- INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
- } vtg;
-
- struct {
- INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
-
- union {
- BitField<0, 2, PixelImap> x;
- BitField<2, 2, PixelImap> y;
- BitField<4, 2, PixelImap> z;
- BitField<6, 2, PixelImap> w;
- u8 raw;
- } imap_generic_vector[32];
-
- INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
- INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
- INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
-
- struct {
- u32 target;
- union {
- BitField<0, 1, u32> sample_mask;
- BitField<1, 1, u32> depth;
- BitField<2, 30, u32> reserved;
- };
- } omap;
-
- bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- const u32 bit = render_target * 4 + component;
- return omap.target & (1 << bit);
- }
-
- PixelImap GetPixelImap(u32 attribute) const {
- const auto get_index = [this, attribute](u32 index) {
- return static_cast<PixelImap>(
- (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
- };
-
- std::optional<PixelImap> result;
- for (u32 component = 0; component < 4; ++component) {
- const PixelImap index = get_index(component);
- if (index == PixelImap::Unused) {
- continue;
- }
- if (result && result != index) {
- LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
- }
- result = index;
- }
- return result.value_or(PixelImap::Unused);
- }
- } ps;
-
- std::array<u32, 0xF> raw;
- };
-
- u64 GetLocalMemorySize() const {
- return (common1.shader_local_memory_low_size |
- (common2.shader_local_memory_high_size << 24));
- }
-};
-static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h
deleted file mode 100644
index 49ce5cde5..000000000
--- a/src/video_core/engines/shader_type.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-
-enum class ShaderType : u32 {
- Vertex = 0,
- TesselationControl = 1,
- TesselationEval = 2,
- Geometry = 3,
- Fragment = 4,
- Compute = 5,
-};
-static constexpr std::size_t MaxShaderTypes = 6;
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index f055b61e9..34dc6c596 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -8,6 +8,7 @@
#include <queue>
#include "common/common_types.h"
+#include "common/settings.h"
#include "core/core.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
@@ -53,6 +54,12 @@ public:
delayed_destruction_ring.Tick();
}
+ // Unlike other fences, this one doesn't
+ void SignalOrdering() {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.AccumulateFlushes();
+ }
+
void SignalSemaphore(GPUVAddr addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 35cc561be..ff024f530 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -50,6 +50,7 @@ void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
+ maxwell_dma->BindRasterizer(rasterizer);
}
Engines::Maxwell3D& GPU::Maxwell3D() {
@@ -268,11 +269,13 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
- case BufferMethods::RefCnt:
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
break;
+ case BufferMethods::RefCnt:
+ rasterizer->SignalReference();
+ break;
case BufferMethods::FenceAction:
ProcessFenceActionMethod();
break;
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
deleted file mode 100644
index f058f2744..000000000
--- a/src/video_core/guest_driver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCore {
-
-void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
- if (texture_handler_size) {
- return;
- }
- const std::size_t size = bound_offsets.size();
- if (size < 2) {
- return;
- }
- std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
- u32 min_val = std::numeric_limits<u32>::max();
- for (std::size_t i = 1; i < size; ++i) {
- if (bound_offsets[i] == bound_offsets[i - 1]) {
- continue;
- }
- const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
- min_val = std::min(min_val, new_min);
- }
- if (min_val > 2) {
- return;
- }
- texture_handler_size = min_texture_handler_size * min_val;
-}
-
-} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
deleted file mode 100644
index 21e569ba1..000000000
--- a/src/video_core/guest_driver.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <vector>
-
-#include "common/common_types.h"
-
-namespace VideoCore {
-
-/**
- * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
- * information necessary for impossible to avoid HLE methods like shader tracks as they are
- * Entscheidungsproblems.
- */
-class GuestDriverProfile {
-public:
- explicit GuestDriverProfile() = default;
- explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
- : texture_handler_size{texture_handler_size_} {}
-
- void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
-
- u32 GetTextureHandlerSize() const {
- return texture_handler_size.value_or(default_texture_handler_size);
- }
-
- bool IsTextureHandlerSizeKnown() const {
- return texture_handler_size.has_value();
- }
-
-private:
- // Minimum size of texture handler any driver can use.
- static constexpr u32 min_texture_handler_size = 4;
-
- // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
- // Thus, certain drivers may squish the size.
- static constexpr u32 default_texture_handler_size = 8;
-
- std::optional<u32> texture_handler_size = default_texture_handler_size;
-};
-
-} // namespace VideoCore
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2208e1922..c9cff7450 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,7 +18,10 @@ set(SHADER_FILES
vulkan_uint8.comp
)
-find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
+find_program(GLSLANGVALIDATOR "glslangValidator")
+if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND")
+ message(FATAL_ERROR "Required program `glslangValidator` not found.")
+endif()
set(GLSL_FLAGS "")
set(QUIET_FLAG "--quiet")
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index eaba1b103..c37f15bfd 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -11,12 +11,8 @@
#define UNIFORM(n)
#define BINDING_INPUT_BUFFER 0
#define BINDING_ENC_BUFFER 1
-#define BINDING_6_TO_8_BUFFER 2
-#define BINDING_7_TO_8_BUFFER 3
-#define BINDING_8_TO_8_BUFFER 4
-#define BINDING_BYTE_TO_16_BUFFER 5
-#define BINDING_SWIZZLE_BUFFER 6
-#define BINDING_OUTPUT_IMAGE 7
+#define BINDING_SWIZZLE_BUFFER 2
+#define BINDING_OUTPUT_IMAGE 3
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
@@ -26,10 +22,6 @@
#define BINDING_SWIZZLE_BUFFER 0
#define BINDING_INPUT_BUFFER 1
#define BINDING_ENC_BUFFER 2
-#define BINDING_6_TO_8_BUFFER 3
-#define BINDING_7_TO_8_BUFFER 4
-#define BINDING_8_TO_8_BUFFER 5
-#define BINDING_BYTE_TO_16_BUFFER 6
#define BINDING_OUTPUT_IMAGE 0
#endif
@@ -76,19 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
EncodingData encoding_values[];
};
-// ASTC Precompiled tables
-layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 {
- uint REPLICATE_6_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 {
- uint REPLICATE_7_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
- uint REPLICATE_8_BIT_TO_8_TABLE[];
-};
-layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 {
- uint REPLICATE_BYTE_TO_16_TABLE[];
-};
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
@@ -139,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] =
const uint REPLICATE_5_BIT_TO_6_TABLE[32] =
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45,
47, 49, 51, 53, 55, 57, 59, 61, 63);
+const uint REPLICATE_6_BIT_TO_8_TABLE[64] =
+ uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89,
+ 93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162,
+ 166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235,
+ 239, 243, 247, 251, 255);
+const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
+ uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44,
+ 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88,
+ 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
+ 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163,
+ 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199,
+ 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235,
+ 237, 239, 241, 243, 245, 247, 249, 251, 253, 255);
// Input ASTC texture globals
uint current_index = 0;
@@ -207,8 +199,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) {
}
uvec4 ReplicateByteTo16(uvec4 value) {
- return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y],
- REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]);
+ return value * 0x101;
}
uint ReplicateBitTo7(uint value) {
@@ -236,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
case 7:
return REPLICATE_7_BIT_TO_8_TABLE[value];
case 8:
- return REPLICATE_8_BIT_TO_8_TABLE[value];
+ return value;
}
return Replicate(value, num_bits, 8);
}
@@ -1327,6 +1318,9 @@ void main() {
offset += swizzle;
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
+ if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
+ return;
+ }
uint block_index =
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7124c755c..882eff880 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,11 +69,15 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
}
- // Flush and invalidate through the GPU interface, to be asynchronous if possible.
- const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
+ const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
- rasterizer->UnmapMemory(*cpu_addr, size);
+ for (const auto& map : submapped_ranges) {
+ // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+ const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
+ ASSERT(cpu_addr);
+
+ rasterizer->UnmapMemory(*cpu_addr, map.second);
+ }
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
}
@@ -127,8 +131,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
//// Lock the new page
// TryLockPage(page_entry, size);
+ auto& current_page = page_table[PageEntryIndex(gpu_addr)];
+
+ if ((!current_page.IsValid() && page_entry.IsValid()) ||
+ current_page.ToAddress() != page_entry.ToAddress()) {
+ rasterizer->ModifyGPUMemory(gpu_addr, size);
+ }
- page_table[PageEntryIndex(gpu_addr)] = page_entry;
+ current_page = page_entry;
}
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
@@ -174,6 +184,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
return page_entry.ToAddress() + (gpu_addr & page_mask);
}
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
+ size_t page_index{addr >> page_bits};
+ const size_t page_last{(addr + size + page_size - 1) >> page_bits};
+ while (page_index < page_last) {
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (page_addr && *page_addr != 0) {
+ return page_addr;
+ }
+ ++page_index;
+ }
+ return std::nullopt;
+}
+
template <typename T>
T MemoryManager::Read(GPUVAddr addr) const {
if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -370,4 +393,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
return page <= Core::Memory::PAGE_SIZE;
}
+bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
+ size_t page_index{gpu_addr >> page_bits};
+ const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+ std::optional<VAddr> old_page_addr{};
+ while (page_index != page_last) {
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (!page_addr || *page_addr == 0) {
+ return false;
+ }
+ if (old_page_addr) {
+ if (*old_page_addr + page_size != *page_addr) {
+ return false;
+ }
+ }
+ old_page_addr = page_addr;
+ ++page_index;
+ }
+ return true;
+}
+
+bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+ size_t page_index{gpu_addr >> page_bits};
+ const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
+ while (page_index < page_last) {
+ if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
+ return false;
+ }
+ ++page_index;
+ }
+ return true;
+}
+
+std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
+ GPUVAddr gpu_addr, std::size_t size) const {
+ std::vector<std::pair<GPUVAddr, std::size_t>> result{};
+ size_t page_index{gpu_addr >> page_bits};
+ size_t remaining_size{size};
+ size_t page_offset{gpu_addr & page_mask};
+ std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
+ std::optional<VAddr> old_page_addr{};
+ const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
+ if (!last_segment) {
+ GPUVAddr new_base_addr = page_index << page_bits;
+ last_segment = {new_base_addr, bytes};
+ } else {
+ last_segment->second += bytes;
+ }
+ };
+ const auto split = [this, &last_segment, &result] {
+ if (last_segment) {
+ result.push_back(*last_segment);
+ last_segment = std::nullopt;
+ }
+ };
+ while (remaining_size > 0) {
+ const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
+ const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
+ if (!page_addr) {
+ split();
+ } else if (old_page_addr) {
+ if (*old_page_addr + page_size != *page_addr) {
+ split();
+ }
+ extend_size(num_bytes);
+ } else {
+ extend_size(num_bytes);
+ }
+ ++page_index;
+ page_offset = 0;
+ remaining_size -= num_bytes;
+ }
+ split();
+ return result;
+}
+
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b3538d503..99d13e7f6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,6 +76,8 @@ public:
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+ [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
+
template <typename T>
[[nodiscard]] T Read(GPUVAddr addr) const;
@@ -112,10 +114,28 @@ public:
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
/**
- * IsGranularRange checks if a gpu region can be simply read with a pointer.
+ * Checks if a gpu region can be simply read with a pointer.
*/
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
+ /**
+ * Checks if a gpu region is mapped by a single range of cpu addresses.
+ */
+ [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+ /**
+ * Checks if a gpu region is mapped entirely.
+ */
+ [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
+
+ /**
+ * Returns a vector with all the subranges of cpu addresses mapped beneath.
+ * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
+ * will be returned;
+ */
+ std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
+ std::size_t size) const;
+
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f968b5b16..b094fc064 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,18 +4,20 @@
#pragma once
-#include <atomic>
#include <functional>
#include <optional>
#include <span>
+#include <stop_token>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
-#include "video_core/guest_driver.h"
namespace Tegra {
class MemoryManager;
+namespace Engines {
+class AccelerateDMAInterface;
}
+} // namespace Tegra
namespace VideoCore {
@@ -42,7 +44,7 @@ public:
virtual void Clear() = 0;
/// Dispatches a compute shader invocation
- virtual void DispatchCompute(GPUVAddr code_addr) = 0;
+ virtual void DispatchCompute() = 0;
/// Resets the counter of a query
virtual void ResetCounter(QueryType type) = 0;
@@ -63,6 +65,9 @@ public:
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
+ /// Signal a GPU based reference as point
+ virtual void SignalReference() = 0;
+
/// Release all pending fences.
virtual void ReleaseFences() = 0;
@@ -87,6 +92,9 @@ public:
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
+ /// Remap GPU memory range. This means underneath backing memory changed
+ virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
+
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
@@ -113,6 +121,8 @@ public:
return false;
}
+ [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
+
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
@@ -123,20 +133,7 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
- virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {}
-
- /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
- [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
- return guest_driver_profile;
- }
-
- /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
- [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
- return guest_driver_profile;
- }
-
-private:
- GuestDriverProfile guest_driver_profile{};
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index c9a360aaf..3ea72fda9 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -19,9 +19,6 @@ RendererBase::~RendererBase() = default;
void RendererBase::RefreshBaseSettings() {
UpdateCurrentFramebufferLayout();
-
- renderer_settings.use_framelimiter = Settings::values.use_frame_limit.GetValue();
- renderer_settings.set_background_color = true;
}
void RendererBase::UpdateCurrentFramebufferLayout() {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 320ee8d30..22b80c328 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -21,9 +21,6 @@ class GraphicsContext;
namespace VideoCore {
struct RendererSettings {
- std::atomic_bool use_framelimiter{false};
- std::atomic_bool set_background_color{false};
-
// Screenshot
std::atomic<bool> screenshot_requested{false};
void* screenshot_bits{};
@@ -42,6 +39,8 @@ public:
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
+ [[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
+
// Getter/setter functions:
// ------------------------
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index 3e4d88c30..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
-// GLASM lacks booleans, so they have to be implemented as integers.
-// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
-// select between two values, because -1 will be evaluated as true and 0 as false.
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-using Operation = const OperationNode&;
-
-constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
-
-char Swizzle(std::size_t component) {
- static constexpr std::string_view SWIZZLE{"xyzw"};
- return SWIZZLE.at(component);
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-std::string_view Modifiers(Operation operation) {
- const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
- if (meta && meta->precise) {
- return ".PREC";
- }
- return "";
-}
-
-std::string_view GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "";
- case PixelImap::Constant:
- return "FLAT ";
- case PixelImap::ScreenLinear:
- return "NOPERSPECTIVE ";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
-}
-
-std::string_view ImageType(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "BUFFER";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "ARRAY1D";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "ARRAY2D";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- }
- UNREACHABLE();
- return {};
-}
-
-std::string_view StackName(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "SSY";
- case MetaStackClass::Pbk:
- return "PBK";
- }
- UNREACHABLE();
- return "";
-};
-
-std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
- switch (topology) {
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
- return "POINTS";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
- return "LINES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
- return "LINES_ADJACENCY";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
- return "TRIANGLES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
- return "TRIANGLES_ADJACENCY";
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return "POINTS";
- }
-}
-
-std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "POINTS";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "LINE_STRIP";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "TRIANGLE_STRIP";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-std::string_view StageInputName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- case ShaderType::Geometry:
- return "vertex";
- case ShaderType::Fragment:
- return "fragment";
- case ShaderType::Compute:
- return "invocation";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-std::string TextureType(const MetaTexture& meta) {
- if (meta.sampler.is_buffer) {
- return "BUFFER";
- }
- std::string type;
- if (meta.sampler.is_shadow) {
- type += "SHADOW";
- }
- if (meta.sampler.is_array) {
- type += "ARRAY";
- }
- type += [&meta] {
- switch (meta.sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return "1D";
- case Tegra::Shader::TextureType::Texture2D:
- return "2D";
- case Tegra::Shader::TextureType::Texture3D:
- return "3D";
- case Tegra::Shader::TextureType::TextureCube:
- return "CUBE";
- }
- UNREACHABLE();
- return "2D";
- }();
- return type;
-}
-
-class ARBDecompiler final {
-public:
- explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier);
-
- std::string Code() const {
- return shader_source;
- }
-
-private:
- void DefineGlobalMemory();
-
- void DeclareHeader();
- void DeclareVertex();
- void DeclareGeometry();
- void DeclareFragment();
- void DeclareCompute();
- void DeclareInputAttributes();
- void DeclareOutputAttributes();
- void DeclareLocalMemory();
- void DeclareGlobalMemory();
- void DeclareConstantBuffers();
- void DeclareRegisters();
- void DeclareTemporaries();
- void DeclarePredicates();
- void DeclareInternalFlags();
-
- void InitializeVariables();
-
- void DecompileAST();
- void DecompileBranchMode();
-
- void VisitAST(const ASTNode& node);
- std::string VisitExpression(const Expr& node);
-
- void VisitBlock(const NodeBlock& bb);
-
- std::string Visit(const Node& node);
-
- std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
- std::string BuildAoffi(Operation);
- std::string GlobalMemoryPointer(const GmemNode& gmem);
- void Exit();
-
- std::string Assign(Operation);
- std::string Select(Operation);
- std::string FClamp(Operation);
- std::string FCastHalf0(Operation);
- std::string FCastHalf1(Operation);
- std::string FSqrt(Operation);
- std::string FSwizzleAdd(Operation);
- std::string HAdd2(Operation);
- std::string HMul2(Operation);
- std::string HFma2(Operation);
- std::string HAbsolute(Operation);
- std::string HNegate(Operation);
- std::string HClamp(Operation);
- std::string HCastFloat(Operation);
- std::string HUnpack(Operation);
- std::string HMergeF32(Operation);
- std::string HMergeH0(Operation);
- std::string HMergeH1(Operation);
- std::string HPack2(Operation);
- std::string LogicalAssign(Operation);
- std::string LogicalPick2(Operation);
- std::string LogicalAnd2(Operation);
- std::string FloatOrdered(Operation);
- std::string FloatUnordered(Operation);
- std::string LogicalAddCarry(Operation);
- std::string Texture(Operation);
- std::string TextureGather(Operation);
- std::string TextureQueryDimensions(Operation);
- std::string TextureQueryLod(Operation);
- std::string TexelFetch(Operation);
- std::string TextureGradient(Operation);
- std::string ImageLoad(Operation);
- std::string ImageStore(Operation);
- std::string Branch(Operation);
- std::string BranchIndirect(Operation);
- std::string PushFlowStack(Operation);
- std::string PopFlowStack(Operation);
- std::string Exit(Operation);
- std::string Discard(Operation);
- std::string EmitVertex(Operation);
- std::string EndPrimitive(Operation);
- std::string InvocationId(Operation);
- std::string YNegate(Operation);
- std::string ThreadId(Operation);
- std::string ShuffleIndexed(Operation);
- std::string Barrier(Operation);
- std::string MemoryBarrierGroup(Operation);
- std::string MemoryBarrierGlobal(Operation);
-
- template <const std::string_view& op>
- std::string Unary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Binary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Trinary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]), Visit(operation[2]));
- return temporary;
- }
-
- template <const std::string_view& op, bool unordered>
- std::string FloatComparison(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NE.x), -1;", temporary);
-
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- if constexpr (unordered) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- } else if (op == SNE_F) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- }
- return temporary;
- }
-
- template <const std::string_view& op, bool is_nan>
- std::string HalfComparison(Operation operation) {
- std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- AddLine("UP2H.F {}, {};", tmp1, op_a);
- AddLine("UP2H.F {}, {};", tmp2, op_b);
- AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
- AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
- AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
- AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
- AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
- if constexpr (is_nan) {
- AddLine("MOVC.F RC.x, {};", op_a);
- AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
- AddLine("MOVC.F RC.x, {};", op_b);
- AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
- }
- return tmp1;
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string AtomicImage(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
-
- AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
- image_id, ImageType(meta.image.type));
- return fmt::format("{}.x", coord);
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string Atomic(Operation operation) {
- std::string temporary = AllocTemporary();
- std::string address;
- std::string_view opname;
- bool robust = false;
- if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- address = GlobalMemoryPointer(*gmem);
- opname = "ATOM";
- robust = true;
- } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
- address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
- opname = "ATOMS";
- } else {
- UNREACHABLE();
- return "{0, 0, 0, 0}";
- }
- if (robust) {
- AddLine("IF NE.x;");
- }
- AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
- if (robust) {
- AddLine("ELSE;");
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("ENDIF;");
- }
- return temporary;
- }
-
- template <char type>
- std::string Negate(Operation operation) {
- std::string temporary = AllocTemporary();
- if constexpr (type == 'F') {
- AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
- } else {
- AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
- }
- return temporary;
- }
-
- template <char type>
- std::string Absolute(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <char type>
- std::string BitfieldInsert(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
- AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
- Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char type>
- std::string BitfieldExtract(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
- AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char swizzle>
- std::string LocalInvocationId(Operation) {
- return fmt::format("invocation.localid.{}", swizzle);
- }
-
- template <char swizzle>
- std::string WorkGroupId(Operation) {
- return fmt::format("invocation.groupid.{}", swizzle);
- }
-
- template <char c1, char c2>
- std::string ThreadMask(Operation) {
- return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
- }
-
- template <typename... Args>
- void AddExpression(std::string_view text, Args&&... args) {
- shader_source += fmt::format(text, std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(text, std::forward<Args>(args)...);
- shader_source += '\n';
- }
-
- std::string AllocLongVectorTemporary() {
- max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
- return fmt::format("L{}", num_long_temporaries++);
- }
-
- std::string AllocLongTemporary() {
- return fmt::format("{}.x", AllocLongVectorTemporary());
- }
-
- std::string AllocVectorTemporary() {
- max_temporaries = std::max(max_temporaries, num_temporaries + 1);
- return fmt::format("T{}", num_temporaries++);
- }
-
- std::string AllocTemporary() {
- return fmt::format("{}.x", AllocVectorTemporary());
- }
-
- void ResetTemporaries() noexcept {
- num_temporaries = 0;
- num_long_temporaries = 0;
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
-
- std::size_t num_temporaries = 0;
- std::size_t max_temporaries = 0;
-
- std::size_t num_long_temporaries = 0;
- std::size_t max_long_temporaries = 0;
-
- std::map<GlobalMemoryBase, u32> global_memory_names;
-
- std::string shader_source;
-
- static constexpr std::string_view ADD_F32 = "ADD.F32";
- static constexpr std::string_view ADD_S = "ADD.S";
- static constexpr std::string_view ADD_U = "ADD.U";
- static constexpr std::string_view MUL_F32 = "MUL.F32";
- static constexpr std::string_view MUL_S = "MUL.S";
- static constexpr std::string_view MUL_U = "MUL.U";
- static constexpr std::string_view DIV_F32 = "DIV.F32";
- static constexpr std::string_view DIV_S = "DIV.S";
- static constexpr std::string_view DIV_U = "DIV.U";
- static constexpr std::string_view MAD_F32 = "MAD.F32";
- static constexpr std::string_view RSQ_F32 = "RSQ.F32";
- static constexpr std::string_view COS_F32 = "COS.F32";
- static constexpr std::string_view SIN_F32 = "SIN.F32";
- static constexpr std::string_view EX2_F32 = "EX2.F32";
- static constexpr std::string_view LG2_F32 = "LG2.F32";
- static constexpr std::string_view SLT_F = "SLT.F32";
- static constexpr std::string_view SLT_S = "SLT.S";
- static constexpr std::string_view SLT_U = "SLT.U";
- static constexpr std::string_view SEQ_F = "SEQ.F32";
- static constexpr std::string_view SEQ_S = "SEQ.S";
- static constexpr std::string_view SEQ_U = "SEQ.U";
- static constexpr std::string_view SLE_F = "SLE.F32";
- static constexpr std::string_view SLE_S = "SLE.S";
- static constexpr std::string_view SLE_U = "SLE.U";
- static constexpr std::string_view SGT_F = "SGT.F32";
- static constexpr std::string_view SGT_S = "SGT.S";
- static constexpr std::string_view SGT_U = "SGT.U";
- static constexpr std::string_view SNE_F = "SNE.F32";
- static constexpr std::string_view SNE_S = "SNE.S";
- static constexpr std::string_view SNE_U = "SNE.U";
- static constexpr std::string_view SGE_F = "SGE.F32";
- static constexpr std::string_view SGE_S = "SGE.S";
- static constexpr std::string_view SGE_U = "SGE.U";
- static constexpr std::string_view AND_S = "AND.S";
- static constexpr std::string_view AND_U = "AND.U";
- static constexpr std::string_view TRUNC_F = "TRUNC.F";
- static constexpr std::string_view TRUNC_S = "TRUNC.S";
- static constexpr std::string_view TRUNC_U = "TRUNC.U";
- static constexpr std::string_view SHL_S = "SHL.S";
- static constexpr std::string_view SHL_U = "SHL.U";
- static constexpr std::string_view SHR_S = "SHR.S";
- static constexpr std::string_view SHR_U = "SHR.U";
- static constexpr std::string_view OR_S = "OR.S";
- static constexpr std::string_view OR_U = "OR.U";
- static constexpr std::string_view XOR_S = "XOR.S";
- static constexpr std::string_view XOR_U = "XOR.U";
- static constexpr std::string_view NOT_S = "NOT.S";
- static constexpr std::string_view NOT_U = "NOT.U";
- static constexpr std::string_view BTC_S = "BTC.S";
- static constexpr std::string_view BTC_U = "BTC.U";
- static constexpr std::string_view BTFM_S = "BTFM.S";
- static constexpr std::string_view BTFM_U = "BTFM.U";
- static constexpr std::string_view ROUND_F = "ROUND.F";
- static constexpr std::string_view CEIL_F = "CEIL.F";
- static constexpr std::string_view FLR_F = "FLR.F";
- static constexpr std::string_view I2F_S = "I2F.S";
- static constexpr std::string_view I2F_U = "I2F.U";
- static constexpr std::string_view MIN_F = "MIN.F";
- static constexpr std::string_view MIN_S = "MIN.S";
- static constexpr std::string_view MIN_U = "MIN.U";
- static constexpr std::string_view MAX_F = "MAX.F";
- static constexpr std::string_view MAX_S = "MAX.S";
- static constexpr std::string_view MAX_U = "MAX.U";
- static constexpr std::string_view MOV_U = "MOV.U";
- static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
- static constexpr std::string_view TGALL_U = "TGALL.U";
- static constexpr std::string_view TGANY_U = "TGANY.U";
- static constexpr std::string_view TGEQ_U = "TGEQ.U";
- static constexpr std::string_view EXCH = "EXCH";
- static constexpr std::string_view ADD = "ADD";
- static constexpr std::string_view MIN = "MIN";
- static constexpr std::string_view MAX = "MAX";
- static constexpr std::string_view AND = "AND";
- static constexpr std::string_view OR = "OR";
- static constexpr std::string_view XOR = "XOR";
- static constexpr std::string_view U32 = "U32";
- static constexpr std::string_view S32 = "S32";
-
- static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
- using DecompilerType = std::string (ARBDecompiler::*)(Operation);
- static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
- &ARBDecompiler::Assign,
-
- &ARBDecompiler::Select,
-
- &ARBDecompiler::Binary<ADD_F32>,
- &ARBDecompiler::Binary<MUL_F32>,
- &ARBDecompiler::Binary<DIV_F32>,
- &ARBDecompiler::Trinary<MAD_F32>,
- &ARBDecompiler::Negate<'F'>,
- &ARBDecompiler::Absolute<'F'>,
- &ARBDecompiler::FClamp,
- &ARBDecompiler::FCastHalf0,
- &ARBDecompiler::FCastHalf1,
- &ARBDecompiler::Binary<MIN_F>,
- &ARBDecompiler::Binary<MAX_F>,
- &ARBDecompiler::Unary<COS_F32>,
- &ARBDecompiler::Unary<SIN_F32>,
- &ARBDecompiler::Unary<EX2_F32>,
- &ARBDecompiler::Unary<LG2_F32>,
- &ARBDecompiler::Unary<RSQ_F32>,
- &ARBDecompiler::FSqrt,
- &ARBDecompiler::Unary<ROUND_F>,
- &ARBDecompiler::Unary<FLR_F>,
- &ARBDecompiler::Unary<CEIL_F>,
- &ARBDecompiler::Unary<TRUNC_F>,
- &ARBDecompiler::Unary<I2F_S>,
- &ARBDecompiler::Unary<I2F_U>,
- &ARBDecompiler::FSwizzleAdd,
-
- &ARBDecompiler::Binary<ADD_S>,
- &ARBDecompiler::Binary<MUL_S>,
- &ARBDecompiler::Binary<DIV_S>,
- &ARBDecompiler::Negate<'S'>,
- &ARBDecompiler::Absolute<'S'>,
- &ARBDecompiler::Binary<MIN_S>,
- &ARBDecompiler::Binary<MAX_S>,
-
- &ARBDecompiler::Unary<TRUNC_S>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_S>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_S>,
- &ARBDecompiler::Binary<AND_S>,
- &ARBDecompiler::Binary<OR_S>,
- &ARBDecompiler::Binary<XOR_S>,
- &ARBDecompiler::Unary<NOT_S>,
- &ARBDecompiler::BitfieldInsert<'S'>,
- &ARBDecompiler::BitfieldExtract<'S'>,
- &ARBDecompiler::Unary<BTC_S>,
- &ARBDecompiler::Unary<BTFM_S>,
-
- &ARBDecompiler::Binary<ADD_U>,
- &ARBDecompiler::Binary<MUL_U>,
- &ARBDecompiler::Binary<DIV_U>,
- &ARBDecompiler::Binary<MIN_U>,
- &ARBDecompiler::Binary<MAX_U>,
- &ARBDecompiler::Unary<TRUNC_U>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::BitfieldInsert<'U'>,
- &ARBDecompiler::BitfieldExtract<'U'>,
- &ARBDecompiler::Unary<BTC_U>,
- &ARBDecompiler::Unary<BTFM_U>,
-
- &ARBDecompiler::HAdd2,
- &ARBDecompiler::HMul2,
- &ARBDecompiler::HFma2,
- &ARBDecompiler::HAbsolute,
- &ARBDecompiler::HNegate,
- &ARBDecompiler::HClamp,
- &ARBDecompiler::HCastFloat,
- &ARBDecompiler::HUnpack,
- &ARBDecompiler::HMergeF32,
- &ARBDecompiler::HMergeH0,
- &ARBDecompiler::HMergeH1,
- &ARBDecompiler::HPack2,
-
- &ARBDecompiler::LogicalAssign,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::LogicalPick2,
- &ARBDecompiler::LogicalAnd2,
-
- &ARBDecompiler::FloatComparison<SLT_F, false>,
- &ARBDecompiler::FloatComparison<SEQ_F, false>,
- &ARBDecompiler::FloatComparison<SLE_F, false>,
- &ARBDecompiler::FloatComparison<SGT_F, false>,
- &ARBDecompiler::FloatComparison<SNE_F, false>,
- &ARBDecompiler::FloatComparison<SGE_F, false>,
- &ARBDecompiler::FloatOrdered,
- &ARBDecompiler::FloatUnordered,
- &ARBDecompiler::FloatComparison<SLT_F, true>,
- &ARBDecompiler::FloatComparison<SEQ_F, true>,
- &ARBDecompiler::FloatComparison<SLE_F, true>,
- &ARBDecompiler::FloatComparison<SGT_F, true>,
- &ARBDecompiler::FloatComparison<SNE_F, true>,
- &ARBDecompiler::FloatComparison<SGE_F, true>,
-
- &ARBDecompiler::Binary<SLT_S>,
- &ARBDecompiler::Binary<SEQ_S>,
- &ARBDecompiler::Binary<SLE_S>,
- &ARBDecompiler::Binary<SGT_S>,
- &ARBDecompiler::Binary<SNE_S>,
- &ARBDecompiler::Binary<SGE_S>,
-
- &ARBDecompiler::Binary<SLT_U>,
- &ARBDecompiler::Binary<SEQ_U>,
- &ARBDecompiler::Binary<SLE_U>,
- &ARBDecompiler::Binary<SGT_U>,
- &ARBDecompiler::Binary<SNE_U>,
- &ARBDecompiler::Binary<SGE_U>,
-
- &ARBDecompiler::LogicalAddCarry,
-
- &ARBDecompiler::HalfComparison<SLT_F, false>,
- &ARBDecompiler::HalfComparison<SEQ_F, false>,
- &ARBDecompiler::HalfComparison<SLE_F, false>,
- &ARBDecompiler::HalfComparison<SGT_F, false>,
- &ARBDecompiler::HalfComparison<SNE_F, false>,
- &ARBDecompiler::HalfComparison<SGE_F, false>,
- &ARBDecompiler::HalfComparison<SLT_F, true>,
- &ARBDecompiler::HalfComparison<SEQ_F, true>,
- &ARBDecompiler::HalfComparison<SLE_F, true>,
- &ARBDecompiler::HalfComparison<SGT_F, true>,
- &ARBDecompiler::HalfComparison<SNE_F, true>,
- &ARBDecompiler::HalfComparison<SGE_F, true>,
-
- &ARBDecompiler::Texture,
- &ARBDecompiler::Texture,
- &ARBDecompiler::TextureGather,
- &ARBDecompiler::TextureQueryDimensions,
- &ARBDecompiler::TextureQueryLod,
- &ARBDecompiler::TexelFetch,
- &ARBDecompiler::TextureGradient,
-
- &ARBDecompiler::ImageLoad,
- &ARBDecompiler::ImageStore,
-
- &ARBDecompiler::AtomicImage<ADD, U32>,
- &ARBDecompiler::AtomicImage<AND, U32>,
- &ARBDecompiler::AtomicImage<OR, U32>,
- &ARBDecompiler::AtomicImage<XOR, U32>,
- &ARBDecompiler::AtomicImage<EXCH, U32>,
-
- &ARBDecompiler::Atomic<EXCH, U32>,
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<EXCH, S32>,
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Branch,
- &ARBDecompiler::BranchIndirect,
- &ARBDecompiler::PushFlowStack,
- &ARBDecompiler::PopFlowStack,
- &ARBDecompiler::Exit,
- &ARBDecompiler::Discard,
-
- &ARBDecompiler::EmitVertex,
- &ARBDecompiler::EndPrimitive,
-
- &ARBDecompiler::InvocationId,
- &ARBDecompiler::YNegate,
- &ARBDecompiler::LocalInvocationId<'x'>,
- &ARBDecompiler::LocalInvocationId<'y'>,
- &ARBDecompiler::LocalInvocationId<'z'>,
- &ARBDecompiler::WorkGroupId<'x'>,
- &ARBDecompiler::WorkGroupId<'y'>,
- &ARBDecompiler::WorkGroupId<'z'>,
-
- &ARBDecompiler::Unary<TGBALLOT_U>,
- &ARBDecompiler::Unary<TGALL_U>,
- &ARBDecompiler::Unary<TGANY_U>,
- &ARBDecompiler::Unary<TGEQ_U>,
-
- &ARBDecompiler::ThreadId,
- &ARBDecompiler::ThreadMask<'e', 'q'>,
- &ARBDecompiler::ThreadMask<'g', 'e'>,
- &ARBDecompiler::ThreadMask<'g', 't'>,
- &ARBDecompiler::ThreadMask<'l', 'e'>,
- &ARBDecompiler::ThreadMask<'l', 't'>,
- &ARBDecompiler::ShuffleIndexed,
-
- &ARBDecompiler::Barrier,
- &ARBDecompiler::MemoryBarrierGroup,
- &ARBDecompiler::MemoryBarrierGlobal,
- };
-};
-
-ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
- DefineGlobalMemory();
-
- AddLine("TEMP RC;");
- AddLine("TEMP FSWZA[4];");
- AddLine("TEMP FSWZB[4];");
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
- AddLine("END");
-
- const std::string code = std::move(shader_source);
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareLocalMemory();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareRegisters();
- DeclareTemporaries();
- DeclarePredicates();
- DeclareInternalFlags();
-
- shader_source += code;
-}
-
-std::string_view HeaderStageName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- return "vp";
- case ShaderType::Geometry:
- return "gp";
- case ShaderType::Fragment:
- return "fp";
- case ShaderType::Compute:
- return "cp";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-void ARBDecompiler::DefineGlobalMemory() {
- u32 binding = 0;
- for (const auto& pair : ir.GetGlobalMemory()) {
- const GlobalMemoryBase base = pair.first;
- global_memory_names.emplace(base, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareHeader() {
- AddLine("!!NV{}5.0", HeaderStageName(stage));
- // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
- AddLine("OPTION NV_internal;");
- AddLine("OPTION NV_gpu_program_fp64;");
- AddLine("OPTION NV_shader_thread_group;");
- if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
- AddLine("OPTION NV_shader_thread_shuffle;");
- }
- if (stage == ShaderType::Vertex) {
- if (device.HasNvViewportArray2()) {
- AddLine("OPTION NV_viewport_array2;");
- }
- }
- if (stage == ShaderType::Fragment) {
- AddLine("OPTION ARB_draw_buffers;");
- }
- if (device.HasImageLoadFormatted()) {
- AddLine("OPTION EXT_shader_image_load_formatted;");
- }
-}
-
-void ARBDecompiler::DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
- AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
-}
-
-void ARBDecompiler::DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
- const auto& info = registry.GetGraphicsInfo();
- const auto& header = ir.GetHeader();
- AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
- AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
- AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
- AddLine("ATTRIB vertex_position = vertex.position;");
-}
-
-void ARBDecompiler::DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- AddLine("OUTPUT result_color7 = result.color[7];");
- AddLine("OUTPUT result_color6 = result.color[6];");
- AddLine("OUTPUT result_color5 = result.color[5];");
- AddLine("OUTPUT result_color4 = result.color[4];");
- AddLine("OUTPUT result_color3 = result.color[3];");
- AddLine("OUTPUT result_color2 = result.color[2];");
- AddLine("OUTPUT result_color1 = result.color[1];");
- AddLine("OUTPUT result_color0 = result.color;");
-}
-
-void ARBDecompiler::DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const ComputeInfo& info = registry.GetComputeInfo();
- AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
- info.workgroup_size[2]);
- if (info.shared_memory_size_in_words == 0) {
- return;
- }
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- u32 size_in_bytes = info.shared_memory_size_in_words * 4;
- if (size_in_bytes > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size_in_bytes, limit);
- size_in_bytes = limit;
- }
-
- AddLine("SHARED_MEMORY {};", size_in_bytes);
- AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
-}
-
-void ARBDecompiler::DeclareInputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- const std::string_view stage_name = StageInputName(stage);
- for (const auto attribute : ir.GetInputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
-
- std::string_view suffix;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix = GetInputFlags(input_mode);
- }
- AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
- index);
- }
-}
-
-void ARBDecompiler::DeclareOutputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
- }
-}
-
-void ARBDecompiler::DeclareLocalMemory() {
- u64 size = 0;
- if (stage == ShaderType::Compute) {
- size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- size = ir.GetHeader().GetLocalMemorySize();
- }
- if (size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(size, 4) / 4;
- AddLine("TEMP lmem[{}];", element_count);
-}
-
-void ARBDecompiler::DeclareGlobalMemory() {
- const size_t num_entries = ir.GetGlobalMemory().size();
- if (num_entries > 0) {
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
- }
-}
-
-void ARBDecompiler::DeclareConstantBuffers() {
- u32 binding = 0;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareRegisters() {
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("TEMP R{};", gpr);
- }
-}
-
-void ARBDecompiler::DeclareTemporaries() {
- for (std::size_t i = 0; i < max_temporaries; ++i) {
- AddLine("TEMP T{};", i);
- }
- for (std::size_t i = 0; i < max_long_temporaries; ++i) {
- AddLine("LONG TEMP L{};", i);
- }
-}
-
-void ARBDecompiler::DeclarePredicates() {
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("TEMP P{};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DeclareInternalFlags() {
- for (const char* name : INTERNAL_FLAG_NAMES) {
- AddLine("TEMP {};", name);
- }
-}
-
-void ARBDecompiler::InitializeVariables() {
- AddLine("MOV.F32 FSWZA[0], -1;");
- AddLine("MOV.F32 FSWZA[1], 1;");
- AddLine("MOV.F32 FSWZA[2], -1;");
- AddLine("MOV.F32 FSWZA[3], 0;");
- AddLine("MOV.F32 FSWZB[0], -1;");
- AddLine("MOV.F32 FSWZB[1], -1;");
- AddLine("MOV.F32 FSWZB[2], 1;");
- AddLine("MOV.F32 FSWZB[3], -1;");
-
- if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
- AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
- }
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
- }
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("TEMP F{};", i);
- }
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
- }
-
- InitializeVariables();
-
- VisitAST(ir.GetASTProgram());
-}
-
-void ARBDecompiler::DecompileBranchMode() {
- static constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP SSY_TOP;");
- AddLine("TEMP PBK_TOP;");
- }
-
- AddLine("TEMP PC;");
-
- if (!ir.IsFlowStackDisabled()) {
- AddLine("MOV.U SSY_TOP.x, 0;");
- AddLine("MOV.U PBK_TOP.x, 0;");
- }
-
- InitializeVariables();
-
- const auto basic_block_end = ir.GetBasicBlocks().end();
- auto basic_block_it = ir.GetBasicBlocks().begin();
- const u32 first_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", first_address);
-
- AddLine("REP;");
-
- std::size_t num_blocks = 0;
- while (basic_block_it != basic_block_end) {
- const auto& [address, bb] = *basic_block_it;
- ++num_blocks;
-
- AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
- AddLine("IF NE.x;");
-
- VisitBlock(bb);
-
- ++basic_block_it;
-
- if (basic_block_it != basic_block_end) {
- const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
- if (!op || op->GetCode() != OperationCode::Branch) {
- const u32 next_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", next_address);
- AddLine("CONT;");
- }
- }
-
- AddLine("ELSE;");
- }
- AddLine("RET;");
- while (num_blocks--) {
- AddLine("ENDIF;");
- }
-
- AddLine("ENDREP;");
-}
-
-void ARBDecompiler::VisitAST(const ASTNode& node) {
- if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(if_then->condition);
- ResetTemporaries();
-
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("IF NE.x;");
- for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("ENDIF;");
- } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
- AddLine("ELSE;");
- for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
- VisitBlock(decoded->nodes);
- } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
- ResetTemporaries();
- } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(do_while->condition);
- ResetTemporaries();
- AddLine("REP;");
- for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("BRK (NE.x);");
- AddLine("ENDREP;");
- } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
- const bool is_true = ExprIsTrue(ast_return->condition);
- if (!is_true) {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
- AddLine("IF NE.x;");
- ResetTemporaries();
- }
- if (ast_return->kills) {
- AddLine("KIL TR;");
- } else {
- Exit();
- }
- if (!is_true) {
- AddLine("ENDIF;");
- }
- } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
- if (ExprIsTrue(ast_break->condition)) {
- AddLine("BRK;");
- } else {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
- AddLine("BRK (NE.x);");
- ResetTemporaries();
- }
- } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
- // Nothing to do
- } else {
- UNREACHABLE();
- }
-}
-
-std::string ARBDecompiler::VisitExpression(const Expr& node) {
- if (const auto expr = std::get_if<ExprAnd>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprOr>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprNot>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
- return result;
- }
- if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
- return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
- }
- if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
- return Visit(ir.GetConditionCode(expr->cc));
- }
- if (const auto expr = std::get_if<ExprVar>(&*node)) {
- return fmt::format("F{}.x", expr->var_index);
- }
- if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
- return expr->value ? "0xffffffff" : "0";
- }
- if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
- return result;
- }
- UNREACHABLE();
- return "0";
-}
-
-void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node);
- }
-}
-
-std::string ARBDecompiler::Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- const std::size_t index = static_cast<std::size_t>(operation->GetCode());
- if (index >= OPERATION_DECOMPILERS.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", index);
- return {};
- }
- const auto decompiler = OPERATION_DECOMPILERS[index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return "{0, 0, 0, 0}.x";
- }
- return fmt::format("R{}.x", index);
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- return fmt::format("CV{}.x", cv->GetIndex());
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
- return temporary;
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- AddLine("MOV.S {}, -1;", temporary);
- break;
- case Tegra::Shader::Pred::NeverExecute:
- AddLine("MOV.S {}, 0;", temporary);
- break;
- default:
- AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
- break;
- }
- if (predicate->IsNegated()) {
- AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
- }
- return temporary;
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- if (abuf->IsPhysicalBuffer()) {
- UNIMPLEMENTED_MSG("Physical buffers are not implemented");
- return "{0, 0, 0, 0}.x";
- }
-
- const Attribute::Index index = abuf->GetIndex();
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (index) {
- case Attribute::Index::Position: {
- if (stage == ShaderType::Geometry) {
- return fmt::format("{}_position[{}].{}", StageInputName(stage),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
- }
- }
- case Attribute::Index::TessCoordInstanceIDVertexID:
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- return "vertex.instance";
- case 3:
- return "vertex.id";
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- break;
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return "fragment.pointcoord.x";
- case 1:
- return "fragment.pointcoord.y";
- }
- UNIMPLEMENTED();
- break;
- case Attribute::Index::FrontFacing: {
- ASSERT(stage == ShaderType::Fragment);
- ASSERT(element == 3);
- const std::string temporary = AllocVectorTemporary();
- AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
- AddLine("MOV.U.CC RC.x, -RC;");
- AddLine("MOV.S {}.x, 0;", temporary);
- AddLine("MOV.S {}.x (NE.x), -1;", temporary);
- return fmt::format("{}.x", temporary);
- }
- default:
- if (IsGenericAttribute(index)) {
- if (stage == ShaderType::Geometry) {
- return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
- GetGenericAttributeIndex(index), swizzle);
- }
- }
- UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
- break;
- }
- return "{0, 0, 0, 0}.x";
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- std::string offset_string;
- const auto& offset = cbuf->GetOffset();
- if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
- offset_string = std::to_string(imm->GetValue());
- } else {
- offset_string = Visit(offset);
- }
- std::string temporary = AllocTemporary();
- AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
- return temporary;
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV {}, 0;", temporary);
- AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
- return temporary;
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- std::string temporary = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", temporary, temporary);
- AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
- return temporary;
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- std::string temporary = Visit(smem->GetAddress());
- AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
- return temporary;
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
- AddLine("IF NE.x;");
- VisitBlock(conditional->GetCode());
- AddLine("ENDIF;");
- return {};
- }
-
- if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
- // Uncommenting this will generate invalid code. GLASM lacks comments.
- // AddLine("// {}", cmt->GetText());
- return {};
- }
-
- UNIMPLEMENTED();
- return {};
-}
-
-std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(meta.sampler.is_indexed);
-
- const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
- meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
- const std::size_t count = operation.GetOperandsCount();
- std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- if (meta.sampler.is_array) {
- AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
- ++i;
- }
- if (meta.sampler.is_shadow) {
- std::string compare = Visit(meta.depth_compare);
- if (is_extended) {
- ASSERT(i == 4);
- std::string extra_coord = AllocVectorTemporary();
- AddLine("MOV.F {}.x, {};", extra_coord, compare);
- return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
- }
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
- ++i;
- }
- return {temporary, temporary, i};
-}
-
-std::string ARBDecompiler::BuildAoffi(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- if (meta.aoffi.empty()) {
- return {};
- }
- const std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (auto& node : meta.aoffi) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
- }
- return fmt::format(", offset({})", temporary);
-}
-
-std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
- // Read a bindless SSBO, return its address and set CC accordingly
- // address = c[binding].xy
- // length = c[binding].z
- const u32 binding = global_memory_names.at(gmem.GetDescriptor());
-
- const std::string pointer = AllocLongVectorTemporary();
- std::string temporary = AllocTemporary();
-
- AddLine("PK64.U {}, c[{}];", pointer, binding);
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
- Visit(gmem.GetBaseAddress()));
- AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
- // Compare offset to length and set CC
- AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
- return fmt::format("{}.x", pointer);
-}
-
-void ARBDecompiler::Exit() {
- if (stage != ShaderType::Fragment) {
- AddLine("RET;");
- return;
- }
-
- const auto safe_get_register = [this](u32 reg) -> std::string {
- if (ir.GetRegisters().contains(reg)) {
- return fmt::format("R{}.x", reg);
- }
- return "{0, 0, 0, 0}.x";
- };
-
- const auto& header = ir.GetHeader();
- u32 current_reg = 0;
- for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
- for (u32 component = 0; component < 4; ++component) {
- if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
- continue;
- }
- AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
- safe_get_register(current_reg));
- ++current_reg;
- }
- }
- if (header.ps.omap.depth) {
- AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
- }
-
- AddLine("RET;");
-}
-
-std::string ARBDecompiler::Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string dest_name;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
- return {};
- }
- dest_name = fmt::format("R{}.x", gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (const Attribute::Index index = abuf->GetIndex()) {
- case Attribute::Index::Position:
- dest_name = fmt::format("result.position.{}", swizzle);
- break;
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return {};
- case 1:
- case 2:
- if (!device.HasNvViewportArray2()) {
- LOG_ERROR(
- Render_OpenGL,
- "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
- return {};
- }
- dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
- break;
- case 3:
- dest_name = "result.pointsize.x";
- break;
- }
- break;
- case Attribute::Index::ClipDistances0123:
- dest_name = fmt::format("result.clip[{}].x", element);
- break;
- case Attribute::Index::ClipDistances4567:
- dest_name = fmt::format("result.clip[{}].x", element + 4);
- break;
- default:
- if (!IsGenericAttribute(index)) {
- UNREACHABLE();
- return {};
- }
- dest_name =
- fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
- break;
- }
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- const std::string address = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", address, address);
- dest_name = fmt::format("lmem[{}].x", address);
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
- ResetTemporaries();
- return {};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- AddLine("IF NE.x;");
- AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
- AddLine("ENDIF;");
- ResetTemporaries();
- return {};
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", dest_name, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::Select(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
- Visit(operation[2]));
- return temporary;
-}
-
-std::string ARBDecompiler::FClamp(Operation operation) {
- // 1.0f in hex, replace with std::bit_cast on C++20
- static constexpr u32 POSITIVE_ONE = 0x3f800000;
-
- std::string temporary = AllocTemporary();
- const Node& value = operation[0];
- const Node& low = operation[1];
- const Node& high = operation[2];
- const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
- const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
- if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
- AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
- } else {
- AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
- AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
- }
- return temporary;
-}
-
-std::string ARBDecompiler::FCastHalf0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FCastHalf1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
- AddLine("MOV {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FSqrt(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
- AddLine("RCP.F32 {}, {};", temporary, temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
- return fmt::format("{}.x", temporary);
- }
-
- AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
- AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
- AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
- AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
- AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
- AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
- AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HAdd2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HMul2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HFma2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string tmp3 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
- AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HAbsolute(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HNegate(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
- AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
- AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
- AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HClamp(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HCastFloat(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
- AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HUnpack(Operation operation) {
- std::string operand = Visit(operation[0]);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H0_H0: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H1_H1: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- }
- UNREACHABLE();
- return "{0, 0, 0, 0}.x";
-}
-
-std::string ARBDecompiler::HMergeF32(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HPack2(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
- AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const Tegra::Shader::Pred index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = fmt::format("P{}.x", static_cast<u64>(index));
- } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", target, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::LogicalPick2(Operation operation) {
- std::string temporary = AllocTemporary();
- const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
- AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAnd2(Operation operation) {
- std::string temporary = AllocTemporary();
- const std::string op = Visit(operation[0]);
- AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatOrdered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("MOV.S {} (NAN.x), 0;", temporary);
- AddLine("MOV.S {} (NAN.y), 0;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatUnordered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NAN.x), -1;", temporary);
- AddLine("MOV.S {} (NAN.y), -1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("IF CF.x;");
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("ENDIF;");
- return temporary;
-}
-
-std::string ARBDecompiler::Texture(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string_view opcode = "TEX";
- std::string extra;
- if (meta.bias) {
- ASSERT(!meta.lod);
- opcode = "TXB";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
- } else {
- const std::string bias = AllocTemporary();
- AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
- extra = fmt::format(" {},", bias);
- }
- }
- if (meta.lod) {
- ASSERT(!meta.bias);
- opcode = "TXL";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- } else {
- const std::string lod = AllocTemporary();
- AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
- extra = fmt::format(" {},", lod);
- }
- }
-
- AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string comp;
- if (!meta.sampler.is_shadow) {
- const auto& immediate = std::get<ImmediateNode>(*meta.component);
- comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
- }
-
- AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
- AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::size_t count = operation.GetOperandsCount();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
- AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
- AddLine("TRUNC.S {}, {};", temporary, temporary);
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TexelFetch(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- if (!meta.sampler.is_buffer) {
- ASSERT(swizzle < 4);
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- }
- AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
- BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const std::string ddx = AllocVectorTemporary();
- const std::string ddy = AllocVectorTemporary();
- const std::string coord = std::get<1>(BuildCoords(operation));
-
- const std::size_t num_components = meta.derivates.size() / 2;
- for (std::size_t index = 0; index < num_components; ++index) {
- const char swizzle = Swizzle(index);
- AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
- AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
- }
-
- const std::string_view result = coord;
- AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
- return fmt::format("{}.x", result);
-}
-
-std::string ARBDecompiler::ImageLoad(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t count = operation.GetOperandsCount();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string temporary = AllocVectorTemporary();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
- AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::ImageStore(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
- AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
- return {};
-}
-
-std::string ARBDecompiler::Branch(Operation operation) {
- const auto target = std::get<ImmediateNode>(*operation[0]);
- AddLine("MOV.U PC.x, {};", target.GetValue());
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::BranchIndirect(Operation operation) {
- AddLine("MOV.U PC.x, {};", Visit(operation[0]));
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
- const std::string_view stack_name = StackName(stack);
- AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
- AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- return {};
-}
-
-std::string ARBDecompiler::PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const std::string_view stack_name = StackName(stack);
- AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::Exit(Operation) {
- Exit();
- return {};
-}
-
-std::string ARBDecompiler::Discard(Operation) {
- AddLine("KIL TR;");
- return {};
-}
-
-std::string ARBDecompiler::EmitVertex(Operation) {
- AddLine("EMIT;");
- return {};
-}
-
-std::string ARBDecompiler::EndPrimitive(Operation) {
- AddLine("ENDPRIM;");
- return {};
-}
-
-std::string ARBDecompiler::InvocationId(Operation) {
- return "primitive.invocation";
-}
-
-std::string ARBDecompiler::YNegate(Operation) {
- LOG_WARNING(Render_OpenGL, "(STUBBED)");
- std::string temporary = AllocTemporary();
- AddLine("MOV.F {}, 1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::ThreadId(Operation) {
- return fmt::format("{}.threadid", StageInputName(stage));
-}
-
-std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- return Visit(operation[0]);
- }
- const std::string temporary = AllocVectorTemporary();
- AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
- Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::Barrier(Operation) {
- AddLine("BAR;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
- AddLine("MEMBAR.CTA;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
- AddLine("MEMBAR;");
- return {};
-}
-
-} // Anonymous namespace
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier) {
- return ARBDecompiler(device, ir, registry, stage, identifier).Code();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string>
-#include <string_view>
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-enum class ShaderType : u32;
-}
-
-namespace VideoCommon::Shader {
-class ShaderIR;
-class Registry;
-} // namespace VideoCommon::Shader
-
-namespace OpenGL {
-
-class Device;
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c225d1fc9..07a995f7d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <span>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
+using VideoCore::Surface::PixelFormat;
+
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
@@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
+
+[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
+ switch (gl_format) {
+ case GL_RGBA8_SNORM:
+ return GL_RGBA8;
+ case GL_R8_SNORM:
+ return GL_R8;
+ case GL_RGBA16_SNORM:
+ return GL_RGBA16;
+ case GL_R16_SNORM:
+ return GL_R16;
+ case GL_RG16_SNORM:
+ return GL_RG16;
+ case GL_RG8_SNORM:
+ return GL_RG8;
+ default:
+ return gl_format;
+ }
+}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
glMakeNamedBufferResidentNV(buffer.handle, access);
}
+GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
+ const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+ return offset == view.offset && size == view.size && format == view.format;
+ })};
+ if (it != views.end()) {
+ return it->texture.handle;
+ }
+ OGLTexture texture;
+ texture.Create(GL_TEXTURE_BUFFER);
+ const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
+ const GLenum texture_format{GetTextureBufferFormat(gl_format)};
+ if (texture_format != gl_format) {
+ LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
+ }
+ glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
+ views.push_back({
+ .offset = offset,
+ .size = size,
+ .format = format,
+ .texture = std::move(texture),
+ });
+ return views.back().texture.handle;
+}
+
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
@@ -98,6 +145,12 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
}
}
+void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
+ glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT,
+ &value);
+}
+
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
@@ -138,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -165,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ const GLuint base_binding = graphics_base_storage_bindings[stage];
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -174,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else {
- const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
- const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ if (size != 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ }
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -193,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else if (size == 0) {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
@@ -207,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
+void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ PixelFormat format) {
+ *texture_handles++ = buffer.View(offset, size, format);
+}
+
+void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
+ *image_handles++ = buffer.View(offset, size, format);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index d8b20a9af..060d36427 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
void MakeResident(GLenum access) noexcept;
+ [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
@@ -41,9 +43,17 @@ public:
}
private:
+ struct BufferView {
+ u32 offset;
+ u32 size;
+ VideoCore::Surface::PixelFormat format;
+ OGLTexture texture;
+ };
+
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
+ std::vector<BufferView> views;
};
class BufferCacheRuntime {
@@ -57,6 +67,8 @@ public:
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
+
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
@@ -73,17 +85,21 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+ void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
+ void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+ const GLuint handle = fast_uniforms[stage][binding_index].handle;
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
if (use_assembly_shaders) {
- const GLuint handle = fast_uniforms[stage][binding_index].handle;
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_UNIFORM_BUFFER, binding,
- fast_uniforms[stage][binding_index].handle, 0,
- static_cast<GLsizeiptr>(size));
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
}
}
@@ -101,7 +117,7 @@ public:
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -116,6 +132,27 @@ public:
return has_fast_buffer_sub_data;
}
+ [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
+ return !use_assembly_shaders;
+ }
+
+ void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_uniform_bindings = bindings;
+ }
+
+ void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_storage_bindings = bindings;
+ }
+
+ void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
+ texture_handles = texture_handles_;
+ image_handles = image_handles_;
+ }
+
+ void SetEnableStorageBuffers(bool use_storage_buffers_) {
+ use_storage_buffers = use_storage_buffers_;
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -129,8 +166,15 @@ private:
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
+ bool use_storage_buffers = false;
+
u32 max_attributes = 0;
+ std::array<GLuint, 5> graphics_base_uniform_bindings{};
+ std::array<GLuint, 5> graphics_base_storage_bindings{};
+ GLuint* texture_handles = nullptr;
+ GLuint* image_handles = nullptr;
+
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -154,6 +198,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
new file mode 100644
index 000000000..aa1cc592f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -0,0 +1,209 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/cityhash.h"
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 16;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
+size_t ComputePipelineKey::Hash() const noexcept {
+ return static_cast<size_t>(
+ Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
+}
+
+bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, sizeof *this) == 0;
+}
+
+ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, const Shader::Info& info_,
+ std::string code, std::vector<u32> code_v)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
+ kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ source_program = CreateProgram(code, GL_COMPUTE_SHADER);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
+ break;
+ }
+ std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+ uniform_buffer_sizes.begin());
+
+ num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
+ num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+
+ const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
+ ASSERT(num_textures <= MAX_TEXTURES);
+
+ const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
+ ASSERT(num_images <= MAX_IMAGES);
+
+ const bool is_glasm{assembly_program.handle != 0};
+ const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+ use_storage_buffers =
+ !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory = !use_storage_buffers &&
+ std::ranges::any_of(info.storage_buffers_descriptors,
+ [](const auto& desc) { return desc.is_written; });
+}
+
+void ComputePipeline::Configure() {
+ buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+ buffer_cache.UnbindComputeStorageBuffers();
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+ desc.is_written);
+ ++ssbo_index;
+ }
+ texture_cache.SynchronizeComputeDescriptors();
+
+ std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ GLsizei sampler_binding{};
+ GLsizei texture_binding{};
+ GLsizei image_binding{};
+
+ const auto& qmd{kepler_compute.launch_description};
+ const auto& cbufs{qmd.const_buffer_config};
+ const bool via_header_index{qmd.linked_tsc != 0};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+ const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+ secondary_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ }
+ }};
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ std::ranges::for_each(info.image_descriptors, add_image);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ if (assembly_program.handle != 0) {
+ program_manager.BindComputeAssemblyProgram(assembly_program.handle);
+ } else {
+ program_manager.BindComputeProgram(source_program.handle);
+ }
+ buffer_cache.UnbindComputeTextureBuffers();
+ size_t texbuf_index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+ buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++texbuf_index;
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+ std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+ buffer_cache.UpdateComputeBuffers();
+
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+ buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
+ buffer_cache.BindHostComputeBuffers();
+
+ const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+ texture_binding += num_texture_buffers;
+ image_binding += num_image_buffers;
+
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ }
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
new file mode 100644
index 000000000..50c676365
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -0,0 +1,93 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines {
+class KeplerCompute;
+}
+
+namespace Shader {
+struct Info;
+}
+
+namespace OpenGL {
+
+class Device;
+class ProgramManager;
+
+struct ComputePipelineKey {
+ u64 unique_hash;
+ u32 shared_memory_size;
+ std::array<u32, 3> workgroup_size;
+
+ size_t Hash() const noexcept;
+
+ bool operator==(const ComputePipelineKey&) const noexcept;
+
+ bool operator!=(const ComputePipelineKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
+static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
+static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
+
+class ComputePipeline {
+public:
+ explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, const Shader::Info& info_,
+ std::string code, std::vector<u32> code_v);
+
+ void Configure();
+
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
+private:
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ ProgramManager& program_manager;
+
+ Shader::Info info;
+ OGLProgram source_program;
+ OGLAssemblyProgram assembly_program;
+ VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+ u32 num_texture_buffers{};
+ u32 num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::ComputePipelineKey> {
+ size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3f4532ca7..9692b8e94 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -17,39 +17,17 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
+#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
-// One uniform block is reserved for emulation purposes
-constexpr u32 ReservedUniformBlocks = 1;
-
-constexpr u32 NumStages = 5;
-
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
-constexpr std::array LIMIT_SSBOS = {
- GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
- GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
-};
-constexpr std::array LIMIT_SAMPLERS = {
- GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
-};
-constexpr std::array LIMIT_IMAGES = {
- GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
- GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
-};
template <typename T>
T GetInteger(GLenum pname) {
@@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
return std::ranges::find(extensions, extension) != extensions.end();
}
-u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
- ASSERT(num >= amount);
- if (limit) {
- amount = std::min(amount, GetInteger<u32>(*limit));
- }
- num -= amount;
- return std::exchange(base, base + amount);
-}
-
-std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
- std::array<u32, Tegra::Engines::MaxShaderTypes> max;
- std::ranges::transform(LIMIT_UBOS, max.begin(),
- [](GLenum pname) { return GetInteger<u32>(pname); });
+std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
+ std::array<u32, Shader::MaxStageTypes> max;
+ std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
return max;
}
-std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
- std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
-
- static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
- const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
- const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
- const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
-
- u32 num_ubos = total_ubos - ReservedUniformBlocks;
- u32 num_ssbos = total_ssbos;
- u32 num_samplers = total_samplers;
-
- u32 base_ubo = ReservedUniformBlocks;
- u32 base_ssbo = 0;
- u32 base_samplers = 0;
-
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages,
- LIMIT_SAMPLERS[stage])};
- }
-
- u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
- u32 base_images = 0;
-
- // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
- // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
- // fragment stage, and at least 1 for the rest of the stages.
- // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
-
- // Reserve at least 4 image bindings on the fragment stage.
- bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
-
- // This is guaranteed to be at least 1.
- const u32 total_extracted_images = num_images / (NumStages - 1);
-
- // Reserve the other image bindings.
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- if (stage == 4) {
- continue;
- }
- bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
- }
-
- // Compute doesn't care about any of this.
- bindings[5] = {0, 0, 0, 0};
-
- return bindings;
-}
-
bool IsASTCSupported() {
- static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
- static constexpr std::array formats = {
+ static constexpr std::array targets{
+ GL_TEXTURE_2D,
+ GL_TEXTURE_2D_ARRAY,
+ };
+ static constexpr std::array formats{
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@@ -172,11 +87,10 @@ bool IsASTCSupported() {
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
- static constexpr std::array required_support = {
+ static constexpr std::array required_support{
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
-
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
@@ -202,13 +116,13 @@ Device::Device() {
LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
throw std::runtime_error{"Insufficient version"};
}
- const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+ vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
- const bool is_nvidia = vendor == "NVIDIA Corporation";
- const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_intel = vendor == "Intel";
+ const bool is_nvidia = vendor_name == "NVIDIA Corporation";
+ const bool is_amd = vendor_name == "ATI Technologies Inc.";
+ const bool is_intel = vendor_name == "Intel";
#ifdef __unix__
const bool is_linux = true;
@@ -223,14 +137,13 @@ Device::Device() {
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
-
max_uniform_buffers = BuildMaxUniformBuffers();
- base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
+ max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -243,18 +156,30 @@ Device::Device() {
has_precise_bug = TestPreciseBug();
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+ has_derivative_control = GLAD_GL_ARB_derivative_control;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
+ has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
+ has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
+ has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
+ has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
+ has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+ warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+ need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
- use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
+ shader_backend = Settings::values.shader_backend.GetValue();
+ use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
-
+ if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
+ LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
+ shader_backend = Settings::ShaderBackend::GLSL;
+ }
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux));
@@ -265,30 +190,59 @@ Device::Device() {
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
has_broken_texture_view_formats);
-
- if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
- LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
- }
-
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
}
}
-Device::Device(std::nullptr_t) {
- max_uniform_buffers.fill(std::numeric_limits<u32>::max());
- uniform_buffer_alignment = 4;
- shader_storage_alignment = 4;
- max_vertex_attributes = 16;
- max_varyings = 15;
- max_compute_shared_memory_size = 0x10000;
- has_warp_intrinsics = true;
- has_shader_ballot = true;
- has_vertex_viewport_layer = true;
- has_image_load_formatted = true;
- has_texture_shadow_lod = true;
- has_variable_aoffi = true;
- has_depth_buffer_float = true;
+std::string Device::GetVendorName() const {
+ if (vendor_name == "NVIDIA Corporation") {
+ return "NVIDIA";
+ }
+ if (vendor_name == "ATI Technologies Inc.") {
+ return "AMD";
+ }
+ if (vendor_name == "Intel") {
+ // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
+ // Simply return `INTEL` for those as well as the Windows driver.
+ return "INTEL";
+ }
+ if (vendor_name == "Intel Open Source Technology Center") {
+ return "I965";
+ }
+ if (vendor_name == "Mesa Project") {
+ return "I915";
+ }
+ if (vendor_name == "Mesa/X.org") {
+ // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
+ // MESA instead of one of those driver names.
+ return "MESA";
+ }
+ if (vendor_name == "AMD") {
+ return "RADEONSI";
+ }
+ if (vendor_name == "nouveau") {
+ return "NOUVEAU";
+ }
+ if (vendor_name == "X.Org") {
+ return "R600";
+ }
+ if (vendor_name == "Collabora Ltd") {
+ return "ZINK";
+ }
+ if (vendor_name == "Intel Corporation") {
+ return "OPENSWR";
+ }
+ if (vendor_name == "Microsoft Corporation") {
+ return "D3D12";
+ }
+ if (vendor_name == "NVIDIA") {
+ // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
+ // strategy would have returned `NVIDIA` here for this driver, the same result as the
+ // proprietary driver.
+ return "TEGRA";
+ }
+ return vendor_name;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index f24bd0c7b..ee992aed4 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,32 +6,22 @@
#include <cstddef>
#include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
+#include "shader_recompiler/stage.h"
+
+namespace Settings {
+enum class ShaderBackend : u32;
+};
namespace OpenGL {
class Device {
public:
- struct BaseBindings {
- u32 uniform_buffer{};
- u32 shader_storage_buffer{};
- u32 sampler{};
- u32 image{};
- };
-
explicit Device();
- explicit Device(std::nullptr_t);
- u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
- return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
- }
+ [[nodiscard]] std::string GetVendorName() const;
- const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
- return base_bindings[stage_index];
- }
-
- const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
- return GetBaseBindings(static_cast<std::size_t>(shader_type));
+ u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
+ return max_uniform_buffers[static_cast<size_t>(stage)];
}
size_t GetUniformBufferAlignment() const {
@@ -54,6 +44,10 @@ public:
return max_compute_shared_memory_size;
}
+ u32 GetMaxGLASMStorageBufferBlocks() const {
+ return max_glasm_storage_buffer_blocks;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -106,6 +100,10 @@ public:
return has_nv_viewport_array2;
}
+ bool HasDerivativeControl() const {
+ return has_derivative_control;
+ }
+
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
@@ -126,17 +124,52 @@ public:
return has_depth_buffer_float;
}
+ bool HasGeometryShaderPassthrough() const {
+ return has_geometry_shader_passthrough;
+ }
+
+ bool HasNvGpuShader5() const {
+ return has_nv_gpu_shader_5;
+ }
+
+ bool HasShaderInt64() const {
+ return has_shader_int64;
+ }
+
+ bool HasAmdShaderHalfFloat() const {
+ return has_amd_shader_half_float;
+ }
+
+ bool HasSparseTexture2() const {
+ return has_sparse_texture_2;
+ }
+
+ bool IsWarpSizePotentiallyLargerThanGuest() const {
+ return warp_size_potentially_larger_than_guest;
+ }
+
+ bool NeedsFastmathOff() const {
+ return need_fastmath_off;
+ }
+
+ Settings::ShaderBackend GetShaderBackend() const {
+ return shader_backend;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
- std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
- std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
+ std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
+ u32 max_glasm_storage_buffer_blocks{};
+
+ Settings::ShaderBackend shader_backend{};
+
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
@@ -150,11 +183,21 @@ private:
bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
+ bool has_derivative_control{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
bool has_depth_buffer_float{};
+ bool has_geometry_shader_passthrough{};
+ bool has_nv_gpu_shader_5{};
+ bool has_shader_int64{};
+ bool has_amd_shader_half_float{};
+ bool has_sparse_texture_2{};
+ bool warp_size_potentially_larger_than_guest{};
+ bool need_fastmath_off{};
+
+ std::string vendor_name;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
new file mode 100644
index 000000000..fac0034fb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -0,0 +1,572 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <string>
+#include <vector>
+
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/shader_notify.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
+
+namespace OpenGL {
+namespace {
+using Shader::ImageBufferDescriptor;
+using Shader::ImageDescriptor;
+using Shader::TextureBufferDescriptor;
+using Shader::TextureDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 8;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
+GLenum Stage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_SHADER;
+ case 1:
+ return GL_TESS_CONTROL_SHADER;
+ case 2:
+ return GL_TESS_EVALUATION_SHADER;
+ case 3:
+ return GL_GEOMETRY_SHADER;
+ case 4:
+ return GL_FRAGMENT_SHADER;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+
+GLenum AssemblyStage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_PROGRAM_NV;
+ case 1:
+ return GL_TESS_CONTROL_PROGRAM_NV;
+ case 2:
+ return GL_TESS_EVALUATION_PROGRAM_NV;
+ case 3:
+ return GL_GEOMETRY_PROGRAM_NV;
+ case 4:
+ return GL_FRAGMENT_PROGRAM_NV;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+ const u8 index = location / 4;
+ if (index >= 8 && index <= 39) {
+ return {GL_GENERIC_ATTRIB_NV, index - 8};
+ }
+ if (index >= 48 && index <= 55) {
+ return {GL_TEXTURE_COORD_NV, index - 48};
+ }
+ switch (index) {
+ case 7:
+ return {GL_POSITION, 0};
+ case 40:
+ return {GL_PRIMARY_COLOR_NV, 0};
+ case 41:
+ return {GL_SECONDARY_COLOR_NV, 0};
+ case 42:
+ return {GL_BACK_PRIMARY_COLOR_NV, 0};
+ case 43:
+ return {GL_BACK_SECONDARY_COLOR_NV, 0};
+ }
+ UNIMPLEMENTED_MSG("index={}", index);
+ return {GL_POSITION, 0};
+}
+
+template <typename Spec>
+bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+ for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
+ if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
+ return false;
+ }
+ const auto& info{stage_infos[stage]};
+ if constexpr (!Spec::has_storage_buffers) {
+ if (!info.storage_buffers_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_texture_buffers) {
+ if (!info.texture_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_image_buffers) {
+ if (!info.image_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_images) {
+ if (!info.image_descriptors.empty()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+ if constexpr (sizeof...(Specs) > 0) {
+ if (!Passes<Spec>(stage_infos, enabled_mask)) {
+ return FindSpec<Specs...>(stage_infos, enabled_mask);
+ }
+ }
+ return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
+}
+
+struct SimpleVertexFragmentSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = true;
+ static constexpr bool has_image_buffers = true;
+ static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
+ return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+ const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
+ state_tracker{state_tracker_}, key{key_} {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
+ }
+ u32 num_textures{};
+ u32 num_images{};
+ u32 num_storage_buffers{};
+ for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
+ auto& info{stage_infos[stage]};
+ if (infos[stage]) {
+ info = *infos[stage];
+ enabled_stages_mask |= 1u << stage;
+ }
+ if (stage < 4) {
+ base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
+ base_storage_bindings[stage + 1] = base_storage_bindings[stage];
+
+ base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
+ base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
+ }
+ enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
+ std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
+
+ const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
+ num_texture_buffers[stage] += num_tex_buffer_bindings;
+ num_textures += num_tex_buffer_bindings;
+
+ const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
+ num_image_buffers[stage] += num_img_buffers_bindings;
+ num_images += num_img_buffers_bindings;
+
+ num_textures += AccumulateCount(info.texture_descriptors);
+ num_images += AccumulateCount(info.image_descriptors);
+ num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+
+ writes_global_memory |= std::ranges::any_of(
+ info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
+ }
+ ASSERT(num_textures <= MAX_TEXTURES);
+ ASSERT(num_images <= MAX_IMAGES);
+
+ const bool assembly_shaders{assembly_programs[0].handle != 0};
+ use_storage_buffers =
+ !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory &= !use_storage_buffers;
+ configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
+
+ if (key.xfb_enabled && device.UseAssemblyShaders()) {
+ GenerateTransformFeedbackState();
+ }
+ const bool in_parallel = thread_worker != nullptr;
+ const auto backend = device.GetShaderBackend();
+ auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
+ shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+ for (size_t stage = 0; stage < 5; ++stage) {
+ switch (backend) {
+ case Settings::ShaderBackend::GLSL:
+ if (!sources[stage].empty()) {
+ source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
+ }
+ break;
+ case Settings::ShaderBackend::GLASM:
+ if (!sources[stage].empty()) {
+ assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
+ if (in_parallel) {
+ // Make sure program is built before continuing when building in parallel
+ glGetString(GL_PROGRAM_ERROR_STRING_NV);
+ }
+ }
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ if (!sources_spirv[stage].empty()) {
+ source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
+ }
+ break;
+ }
+ }
+ if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
+ // Make sure programs have built if we are building shaders in parallel
+ for (OGLProgram& program : source_programs) {
+ if (program.handle != 0) {
+ GLint status{};
+ glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
+ }
+ }
+ }
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ is_built = true;
+ built_condvar.notify_one();
+ }};
+ if (thread_worker) {
+ thread_worker->QueueWork(std::move(func));
+ } else {
+ func(nullptr);
+ }
+}
+
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+ std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ size_t image_view_index{};
+ GLsizei sampler_binding{};
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
+ buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+ buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
+ buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+
+ const auto& regs{maxwell3d.regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ if constexpr (Spec::has_storage_buffers) {
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+ desc.cbuf_offset, desc.is_written);
+ ++ssbo_index;
+ }
+ }
+ const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(cbufs[desc.cbuf_index].enabled);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+ if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+ const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+ second_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 raw{lhs_raw | rhs_raw};
+ return TexturePair(raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ }
+ }};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ config_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ config_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ config_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ config_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ config_stage(4);
+ }
+ const std::span indices_span(image_view_indices.data(), image_view_index);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ texture_cache.UpdateRenderTargets(false);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+ ImageId* texture_buffer_index{image_view_ids.data()};
+ const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++index;
+ ++texture_buffer_index;
+ }
+ }};
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsTextureBuffers(stage);
+
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ bind_stage_info(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ bind_stage_info(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ bind_stage_info(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ bind_stage_info(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ bind_stage_info(4);
+ }
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ WaitForBuild();
+ }
+ if (assembly_programs[0].handle != 0) {
+ program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
+ } else {
+ program_manager.BindSourcePrograms(source_programs);
+ }
+ const ImageId* views_it{image_view_ids.data()};
+ GLsizei texture_binding = 0;
+ GLsizei image_binding = 0;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ const auto prepare_stage{[&](size_t stage) {
+ buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
+ buffer_cache.BindHostStageBuffers(stage);
+
+ texture_binding += num_texture_buffers[stage];
+ image_binding += num_image_buffers[stage];
+
+ views_it += num_texture_buffers[stage];
+ views_it += num_image_buffers[stage];
+
+ const auto& info{stage_infos[stage]};
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ prepare_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ prepare_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ prepare_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ prepare_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ prepare_stage(4);
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+}
+
+void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
+ glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
+ xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
+}
+
+void GraphicsPipeline::GenerateTransformFeedbackState() {
+ // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+ // when this is required.
+ GLint* cursor{xfb_attribs.data()};
+ GLint* current_stream{xfb_streams.data()};
+
+ for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+ const auto& layout = key.xfb_state.layouts[feedback];
+ UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+ if (layout.varying_count == 0) {
+ continue;
+ }
+ *current_stream = static_cast<GLint>(feedback);
+ if (current_stream != xfb_streams.data()) {
+ // When stepping one stream, push the expected token
+ cursor[0] = GL_NEXT_BUFFER_NV;
+ cursor[1] = 0;
+ cursor[2] = 0;
+ cursor += XFB_ENTRY_STRIDE;
+ }
+ ++current_stream;
+
+ const auto& locations = key.xfb_state.varyings[feedback];
+ std::optional<u8> current_index;
+ for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+ const u8 location = locations[offset];
+ const u8 index = location / 4;
+
+ if (current_index == index) {
+ // Increase number of components of the previous attachment
+ ++cursor[-2];
+ continue;
+ }
+ current_index = index;
+
+ std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+ cursor[1] = 1;
+ cursor += XFB_ENTRY_STRIDE;
+ }
+ }
+ num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
+ num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
+}
+
+void GraphicsPipeline::WaitForBuild() {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
new file mode 100644
index 000000000..4e28d9a42
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <type_traits>
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/transform_feedback.h"
+
+namespace OpenGL {
+
+namespace ShaderContext {
+struct Context;
+}
+
+class Device;
+class ProgramManager;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
+
+struct GraphicsPipelineKey {
+ std::array<u64, 6> unique_hashes;
+ union {
+ u32 raw;
+ BitField<0, 1, u32> xfb_enabled;
+ BitField<1, 1, u32> early_z;
+ BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
+ BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
+ BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
+ BitField<10, 1, u32> tessellation_clockwise;
+ };
+ std::array<u32, 3> padding;
+ VideoCommon::TransformFeedbackState xfb_state;
+
+ size_t Hash() const noexcept {
+ return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
+ }
+
+ bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, Size()) == 0;
+ }
+
+ bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ [[nodiscard]] size_t Size() const noexcept {
+ if (xfb_enabled != 0) {
+ return sizeof(GraphicsPipelineKey);
+ } else {
+ return offsetof(GraphicsPipelineKey, padding);
+ }
+ }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
+
+class GraphicsPipeline {
+public:
+ explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
+ std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv,
+ const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_);
+
+ void Configure(bool is_indexed) {
+ configure_func(this, is_indexed);
+ }
+
+ void ConfigureTransformFeedback() const {
+ if (num_xfb_attribs != 0) {
+ ConfigureTransformFeedbackImpl();
+ }
+ }
+
+ [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
+ return key;
+ }
+
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
+ [[nodiscard]] bool IsBuilt() const noexcept {
+ return is_built.load(std::memory_order::relaxed);
+ }
+
+ template <typename Spec>
+ static auto MakeConfigureSpecFunc() {
+ return [](GraphicsPipeline* pipeline, bool is_indexed) {
+ pipeline->ConfigureImpl<Spec>(is_indexed);
+ };
+ }
+
+private:
+ template <typename Spec>
+ void ConfigureImpl(bool is_indexed);
+
+ void ConfigureTransformFeedbackImpl() const;
+
+ void GenerateTransformFeedbackState();
+
+ void WaitForBuild();
+
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+ const GraphicsPipelineKey key;
+
+ void (*configure_func)(GraphicsPipeline*, bool){};
+
+ std::array<OGLProgram, 5> source_programs;
+ std::array<OGLAssemblyProgram, 5> assembly_programs;
+ u32 enabled_stages_mask{};
+
+ std::array<Shader::Info, 5> stage_infos{};
+ std::array<u32, 5> enabled_uniform_buffer_masks{};
+ VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
+ std::array<u32, 5> base_uniform_bindings{};
+ std::array<u32, 5> base_storage_bindings{};
+ std::array<u32, 5> num_texture_buffers{};
+ std::array<u32, 5> num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+
+ static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
+ GLsizei num_xfb_attribs{};
+ GLsizei num_xfb_strides{};
+ std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
+ std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
+
+ std::mutex built_mutex;
+ std::condition_variable built_condvar;
+ std::atomic_bool is_built{false};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::GraphicsPipelineKey> {
+ size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f87bb269b..41d2b73f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -23,7 +23,6 @@
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
@@ -40,7 +39,6 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using GLvec4 = std::array<GLfloat, 4>;
-using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
-
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
- ShaderType shader_type, size_t index = 0) {
- if constexpr (std::is_same_v<Entry, SamplerEntry>) {
- if (entry.is_separated) {
- const u32 buffer_1 = entry.buffer;
- const u32 buffer_2 = entry.secondary_buffer;
- const u32 offset_1 = entry.offset;
- const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return TextureHandle(handle_1 | handle_2, via_header_index);
- }
- }
- if (entry.is_bindless) {
- const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return TextureHandle(raw, via_header_index);
- }
- const u32 buffer = engine.GetBoundBuffer();
- const u64 offset = (entry.offset + index) * sizeof(u32);
- return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-/// Translates hardware transform feedback indices
-/// @param location Hardware location
-/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
-/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
-std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
- const u8 index = location / 4;
- if (index >= 8 && index <= 39) {
- return {GL_GENERIC_ATTRIB_NV, index - 8};
- }
- if (index >= 48 && index <= 55) {
- return {GL_TEXTURE_COORD_NV, index - 48};
- }
- switch (index) {
- case 7:
- return {GL_POSITION, 0};
- case 40:
- return {GL_PRIMARY_COLOR_NV, 0};
- case 41:
- return {GL_SECONDARY_COLOR_NV, 0};
- case 42:
- return {GL_BACK_PRIMARY_COLOR_NV, 0};
- case 43:
- return {GL_BACK_SECONDARY_COLOR_NV, 0};
- }
- UNIMPLEMENTED_MSG("index={}", index);
- return {GL_POSITION, 0};
-}
-
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
- if (entry.is_buffer) {
- return ImageViewType::Buffer;
- }
- switch (entry.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
- case Tegra::Shader::TextureType::Texture2D:
- return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
- case Tegra::Shader::TextureType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::TextureType::TextureCube:
- return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
- switch (entry.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return ImageViewType::e1D;
- case Tegra::Shader::ImageType::Texture1DArray:
- return ImageViewType::e1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return ImageViewType::e2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return ImageViewType::e2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return ImageViewType::Buffer;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
- query_cache(*this, maxwell3d, gpu_memory),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
- async_shaders(emu_window_) {
- if (device.UseAsynchronousShaders()) {
- async_shaders.AllocateWorkers();
- }
-}
+ shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
+ buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
+ query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
- if (attrib.IsConstant()) {
+ if (attrib.constant) {
glDisableVertexAttribArray(gl_index);
continue;
}
@@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
}
}
-void RasterizerOpenGL::SetupShaders(bool is_indexed) {
- u32 clip_distances = 0;
-
- std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeGraphicsDescriptors();
-
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = maxwell3d.regs.shader_config[index];
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- switch (program) {
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(0);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(0);
- break;
- default:
- break;
- }
- continue;
- }
- // Currently this stages are not supported in the OpenGL backend.
- // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
- if (program == Maxwell::ShaderProgram::TesselationControl ||
- program == Maxwell::ShaderProgram::TesselationEval) {
- continue;
- }
-
- Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
- const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
- switch (program) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- program_manager.UseVertexShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(program_handle);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
- shader_config.enable.Value(), shader_config.offset);
- break;
- }
-
- // Stage indices are 0 - 5
- const size_t stage = index == 0 ? 0 : index - 1;
- shaders[stage] = shader;
-
- SetupDrawTextures(shader, stage);
- SetupDrawImages(shader, stage);
-
- buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
-
- buffer_cache.UnbindGraphicsStorageBuffers(stage);
- u32 ssbo_index = 0;
- for (const auto& buffer : shader->GetEntries().global_memory_entries) {
- buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
- buffer.cbuf_offset, buffer.is_written);
- ++ssbo_index;
- }
-
- // Workaround for Intel drivers.
- // When a clip distance is enabled but not set in the shader it crops parts of the screen
- // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
- // clip distances only when it's written by a shader stage.
- clip_distances |= shader->GetEntries().clip_distances;
-
- // When VertexA is enabled, we have dual vertex shaders
- if (program == Maxwell::ShaderProgram::VertexA) {
- // VertexB was combined with VertexA, so we skip the VertexB iteration
- ++index;
- }
- }
- SyncClipEnabled(clip_distances);
- maxwell3d.dirty.flags[Dirty::Shaders] = false;
-
- buffer_cache.UpdateGraphicsBuffers(is_indexed);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
- buffer_cache.BindHostGeometryBuffers(is_indexed);
-
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- const Shader* const shader = shaders[stage];
- if (!shader) {
- continue;
- }
- buffer_cache.BindHostStageBuffers(stage);
- const auto& base = device.GetBaseBindings(stage);
- BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
- texture_index, image_index);
- }
-}
-
-void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(title_id, stop_loading, callback);
+ shader_cache.LoadDiskResources(title_id, stop_loading, callback);
}
void RasterizerOpenGL::Clear() {
@@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncState();
- // Setup shaders and their used resources.
+ GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
+ if (!pipeline) {
+ return;
+ }
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- SetupShaders(is_indexed);
-
- texture_cache.UpdateRenderTargets(false);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- program_manager.BindGraphicsPipeline();
+ pipeline->Configure(is_indexed);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
- BeginTransformFeedback(primitive_mode);
+ BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
@@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
-
EndTransformFeedback();
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
gpu.TickWork();
}
-void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- BindComputeTextures(kernel);
-
- const auto& entries = kernel->GetEntries();
- buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
- buffer_cache.UnbindComputeStorageBuffers();
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_memory_entries) {
- buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
- buffer.is_written);
- ++ssbo_index;
- }
- buffer_cache.UpdateComputeBuffers();
- buffer_cache.BindHostComputeBuffers();
-
- const auto& launch_desc = kepler_compute.launch_description;
- glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
+void RasterizerOpenGL::DispatchCompute() {
+ ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
+ if (!pipeline) {
+ return;
+ }
+ pipeline->Configure();
+ const auto& qmd{kepler_compute.launch_description};
+ glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -611,6 +385,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
shader_cache.OnCPUWrite(addr, size);
}
+void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
@@ -627,6 +408,13 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
+void RasterizerOpenGL::SignalReference() {
+ if (!gpu.IsAsync()) {
+ return;
+ }
+ fence_manager.SignalOrdering();
+}
+
void RasterizerOpenGL::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
@@ -643,10 +431,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::WaitForIdle() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
+ SignalReference();
}
void RasterizerOpenGL::FragmentBarrier() {
- glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
void RasterizerOpenGL::TiledCacheBarrier() {
@@ -659,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
return;
}
num_queued_commands = 0;
+
+ // Make sure memory stored from the previous GL command stream is visible
+ // This is only needed on assembly shaders where we write to GPU memory with raw pointers
+ if (has_written_global_memory) {
+ has_written_global_memory = false;
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+ }
glFlush();
}
@@ -686,6 +482,10 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
return true;
}
+Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
+ return accelerate_dma;
+}
+
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {
@@ -702,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
- screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
-void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeComputeDescriptors();
-
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
- program_manager.BindCompute(kernel->GetHandle());
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
-}
-
-void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
- GLuint base_image, size_t& image_view_index,
- size_t& texture_index, size_t& image_index) {
- const GLuint* const samplers = sampler_handles.data() + texture_index;
- const GLuint* const textures = texture_handles.data() + texture_index;
- const GLuint* const images = image_handles.data() + image_index;
-
- const size_t num_samplers = entries.samplers.size();
- for (const auto& sampler : entries.samplers) {
- for (size_t i = 0; i < sampler.size; ++i) {
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
- texture_handles[texture_index++] = handle;
- }
- }
- const size_t num_images = entries.images.size();
- for (size_t unit = 0; unit < num_images; ++unit) {
- // TODO: Mark as modified
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
- image_handles[image_index] = handle;
- ++image_index;
- }
- if (num_samplers > 0) {
- glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
- glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
- }
- if (num_images > 0) {
- glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
- }
-}
-
-void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().samplers) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- for (size_t index = 0; index < entry.size; ++index) {
- const auto handle =
- GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
- const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : kernel->GetEntries().samplers) {
- for (size_t i = 0; i < entry.size; ++i) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
- const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : shader->GetEntries().images) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
- image_view_indices.push_back(handle.image);
- }
-}
-
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
@@ -922,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& flags = maxwell3d.dirty.flags;
- if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+ if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
@@ -1299,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
-void RasterizerOpenGL::SyncTransformFeedback() {
- // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
- // when this is required.
- const auto& regs = maxwell3d.regs;
-
- static constexpr std::size_t STRIDE = 3;
- std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
- std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
-
- GLint* cursor = attribs.data();
- GLint* current_stream = streams.data();
-
- for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
- const auto& layout = regs.tfb_layouts[feedback];
- UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
- if (layout.varying_count == 0) {
- continue;
- }
-
- *current_stream = static_cast<GLint>(feedback);
- if (current_stream != streams.data()) {
- // When stepping one stream, push the expected token
- cursor[0] = GL_NEXT_BUFFER_NV;
- cursor[1] = 0;
- cursor[2] = 0;
- cursor += STRIDE;
- }
- ++current_stream;
-
- const auto& locations = regs.tfb_varying_locs[feedback];
- std::optional<u8> current_index;
- for (u32 offset = 0; offset < layout.varying_count; ++offset) {
- const u8 location = locations[offset];
- const u8 index = location / 4;
-
- if (current_index == index) {
- // Increase number of components of the previous attachment
- ++cursor[-2];
- continue;
- }
- current_index = index;
-
- std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
- cursor[1] = 1;
- cursor += STRIDE;
- }
- }
-
- const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
- const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
- glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
- GL_INTERLEAVED_ATTRIBS);
-}
-
-void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
+void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
- if (device.UseAssemblyShaders()) {
- SyncTransformFeedback();
- }
+ program->ConfigureTransformFeedback();
+
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1374,11 +1019,21 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = maxwell3d.regs;
- if (regs.tfb_enabled == 0) {
- return;
+ if (maxwell3d.regs.tfb_enabled != 0) {
+ glEndTransformFeedback();
}
- glEndTransformFeedback();
+}
+
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
+
+bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMACopy(src_address, dest_address, amount);
+}
+
+bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMAClear(src_address, amount, value);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 76298517f..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -27,11 +28,9 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
namespace Core::Memory {
@@ -58,6 +57,18 @@ struct BindlessSSBO {
};
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
+class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
+public:
+ explicit AccelerateDMA(BufferCache& buffer_cache);
+
+ bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override;
+
+ bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+
+private:
+ BufferCache& buffer_cache;
+};
+
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -68,7 +79,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
- void DispatchCompute(GPUVAddr code_addr) override;
+ void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -80,8 +91,10 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
+ void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
@@ -92,9 +105,10 @@ public:
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
/// Returns true when there are commands queued to the OpenGL server.
@@ -102,36 +116,11 @@ public:
return num_queued_commands > 0;
}
- VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
- return async_shaders;
- }
-
- const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
- return async_shaders;
- }
-
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
- void BindComputeTextures(Shader* kernel);
-
- void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
- size_t& image_view_index, size_t& texture_index, size_t& image_index);
-
- /// Configures the current textures to use for the draw command.
- void SetupDrawTextures(const Shader* shader, size_t stage_index);
-
- /// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader* kernel);
-
- /// Configures images in a graphics shader.
- void SetupDrawImages(const Shader* shader, size_t stage_index);
-
- /// Configures images in a compute shader.
- void SetupComputeImages(const Shader* shader);
-
/// Syncs state to match guest's
void SyncState();
@@ -204,18 +193,12 @@ private:
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
- /// Syncs transform feedback state to match guest state
- /// @note Only valid on assembly shaders
- void SyncTransformFeedback();
-
/// Begin a transform feedback
- void BeginTransformFeedback(GLenum primitive_mode);
+ void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
- void SetupShaders(bool is_indexed);
-
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
@@ -230,12 +213,11 @@ private:
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
- ShaderCacheOpenGL shader_cache;
+ ShaderCache shader_cache;
QueryCache query_cache;
+ AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
- VideoCommon::Shader::AsyncShaders async_shaders;
-
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@@ -243,7 +225,8 @@ private:
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
- std::size_t num_queued_commands = 0;
+ size_t num_queued_commands = 0;
+ bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3428e5e21..8695c29e3 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -83,18 +83,6 @@ void OGLSampler::Release() {
handle = 0;
}
-void OGLShader::Create(std::string_view source, GLenum type) {
- if (handle != 0) {
- return;
- }
- if (source.empty()) {
- return;
- }
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- handle = GLShader::LoadShader(source, type);
-}
-
void OGLShader::Release() {
if (handle == 0)
return;
@@ -104,21 +92,6 @@ void OGLShader::Release() {
handle = 0;
}
-void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
- const char* frag_shader, bool separable_program,
- bool hint_retrievable) {
- OGLShader vert, geo, frag;
- if (vert_shader)
- vert.Create(vert_shader, GL_VERTEX_SHADER);
- if (geo_shader)
- geo.Create(geo_shader, GL_GEOMETRY_SHADER);
- if (frag_shader)
- frag.Create(frag_shader, GL_FRAGMENT_SHADER);
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
-}
-
void OGLProgram::Release() {
if (handle == 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 552d79db4..b2d5bfd3b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
@@ -128,8 +127,6 @@ public:
return *this;
}
- void Create(std::string_view source, GLenum type);
-
void Release();
GLuint handle = 0;
@@ -151,17 +148,6 @@ public:
return *this;
}
- template <typename... T>
- void Create(bool separable_program, bool hint_retrievable, T... shaders) {
- if (handle != 0)
- return;
- handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
- }
-
- /// Creates a new internal OpenGL resource and stores the handle
- void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
- bool separable_program = false, bool hint_retrievable = false);
-
/// Deletes the internal OpenGL resource
void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5cf7cd151..1f4dda17e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,606 +3,543 @@
// Refer to the license.txt file included.
#include <atomic>
+#include <fstream>
#include <functional>
#include <mutex>
-#include <optional>
#include <string>
#include <thread>
-#include <unordered_set>
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
+#include "common/settings.h"
+#include "common/thread_worker.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/profile.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::GetUniqueIdentifier;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::Registry;
-using VideoCommon::Shader::ShaderIR;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
namespace {
-
-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
-
-/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetGLShaderType(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_SHADER;
- case ShaderType::Geometry:
- return GL_GEOMETRY_SHADER;
- case ShaderType::Fragment:
- return GL_FRAGMENT_SHADER;
- case ShaderType::Compute:
- return GL_COMPUTE_SHADER;
- default:
- return GL_NONE;
- }
+using Shader::Backend::GLASM::EmitGLASM;
+using Shader::Backend::GLSL::EmitGLSL;
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+using VideoCommon::LoadPipelines;
+using VideoCommon::SerializePipeline;
+using Context = ShaderContext::Context;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
}
-constexpr const char* GetShaderTypeName(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return "VS";
- case ShaderType::TesselationControl:
- return "HS";
- case ShaderType::TesselationEval:
- return "DS";
- case ShaderType::Geometry:
- return "GS";
- case ShaderType::Fragment:
- return "FS";
- case ShaderType::Compute:
- return "CS";
- }
- return "UNK";
+Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
+ const Shader::IR::Program& program,
+ const Shader::IR::Program* previous_program,
+ bool glasm_use_storage_buffers, bool use_assembly_shaders) {
+ Shader::RuntimeInfo info;
+ if (previous_program) {
+ info.previous_stage_stores = previous_program->info.stores;
+ } else {
+ // Mark all stores as available for vertex shaders
+ info.previous_stage_stores.mask.set();
+ }
+ switch (program.stage) {
+ case Shader::Stage::VertexB:
+ case Shader::Stage::Geometry:
+ if (!use_assembly_shaders && key.xfb_enabled != 0) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ }
+ break;
+ case Shader::Stage::TessellationEval:
+ info.tess_clockwise = key.tessellation_clockwise != 0;
+ info.tess_primitive = [&key] {
+ switch (key.tessellation_primitive) {
+ case Maxwell::TessellationPrimitive::Isolines:
+ return Shader::TessPrimitive::Isolines;
+ case Maxwell::TessellationPrimitive::Triangles:
+ return Shader::TessPrimitive::Triangles;
+ case Maxwell::TessellationPrimitive::Quads:
+ return Shader::TessPrimitive::Quads;
+ }
+ UNREACHABLE();
+ return Shader::TessPrimitive::Triangles;
+ }();
+ info.tess_spacing = [&] {
+ switch (key.tessellation_spacing) {
+ case Maxwell::TessellationSpacing::Equal:
+ return Shader::TessSpacing::Equal;
+ case Maxwell::TessellationSpacing::FractionalOdd:
+ return Shader::TessSpacing::FractionalOdd;
+ case Maxwell::TessellationSpacing::FractionalEven:
+ return Shader::TessSpacing::FractionalEven;
+ }
+ UNREACHABLE();
+ return Shader::TessSpacing::Equal;
+ }();
+ break;
+ case Shader::Stage::Fragment:
+ info.force_early_z = key.early_z != 0;
+ break;
+ default:
+ break;
+ }
+ switch (key.gs_input_topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ info.input_topology = Shader::InputTopology::Points;
+ break;
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineLoop:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ info.input_topology = Shader::InputTopology::Lines;
+ break;
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ case Maxwell::PrimitiveTopology::Quads:
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ case Maxwell::PrimitiveTopology::Polygon:
+ case Maxwell::PrimitiveTopology::Patches:
+ info.input_topology = Shader::InputTopology::Triangles;
+ break;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ info.input_topology = Shader::InputTopology::LinesAdjacency;
+ break;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+ break;
+ }
+ info.glasm_use_storage_buffers = glasm_use_storage_buffers;
+ return info;
}
-constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
- switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- return ShaderType::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return ShaderType::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ShaderType::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ShaderType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ShaderType::Fragment;
- }
- return {};
+void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+ std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+ return VideoCommon::TransformFeedbackState::Layout{
+ .stream = layout.stream,
+ .varying_count = layout.varying_count,
+ .stride = layout.stride,
+ };
+ });
+ state.varyings = regs.tfb_varying_locs;
}
+} // Anonymous namespace
-constexpr GLenum AssemblyEnum(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_PROGRAM_NV;
- case ShaderType::TesselationControl:
- return GL_TESS_CONTROL_PROGRAM_NV;
- case ShaderType::TesselationEval:
- return GL_TESS_EVALUATION_PROGRAM_NV;
- case ShaderType::Geometry:
- return GL_GEOMETRY_PROGRAM_NV;
- case ShaderType::Fragment:
- return GL_FRAGMENT_PROGRAM_NV;
- case ShaderType::Compute:
- return GL_COMPUTE_PROGRAM_NV;
+ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+ emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
+ buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
+ shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+ profile{
+ .supported_spirv = 0x00010000,
+
+ .unified_descriptor_binding = false,
+ .support_descriptor_aliasing = false,
+ .support_int8 = false,
+ .support_int16 = false,
+ .support_int64 = device.HasShaderInt64(),
+ .support_vertex_instance_id = true,
+ .support_float_controls = false,
+ .support_separate_denorm_behavior = false,
+ .support_separate_rounding_mode = false,
+ .support_fp16_denorm_preserve = false,
+ .support_fp32_denorm_preserve = false,
+ .support_fp16_denorm_flush = false,
+ .support_fp32_denorm_flush = false,
+ .support_fp16_signed_zero_nan_preserve = false,
+ .support_fp32_signed_zero_nan_preserve = false,
+ .support_fp64_signed_zero_nan_preserve = false,
+ .support_explicit_workgroup_layout = false,
+ .support_vote = true,
+ .support_viewport_index_layer_non_geometry =
+ device.HasNvViewportArray2() || device.HasVertexViewportLayer(),
+ .support_viewport_mask = device.HasNvViewportArray2(),
+ .support_typeless_image_loads = device.HasImageLoadFormatted(),
+ .support_demote_to_helper_invocation = false,
+ .support_int64_atomics = false,
+ .support_derivative_control = device.HasDerivativeControl(),
+ .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+ .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
+ .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
+ .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
+ .support_gl_warp_intrinsics = false,
+ .support_gl_variable_aoffi = device.HasVariableAoffi(),
+ .support_gl_sparse_textures = device.HasSparseTexture2(),
+ .support_gl_derivative_control = device.HasDerivativeControl(),
+
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
+
+ .lower_left_origin_mode = true,
+ .need_declared_frag_colors = true,
+ .need_fastmath_off = device.NeedsFastmathOff(),
+
+ .has_broken_spirv_clamp = true,
+ .has_broken_unsigned_image_offsets = true,
+ .has_broken_signed_operations = true,
+ .has_broken_fp16_float_controls = false,
+ .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
+ .has_gl_precise_bug = device.HasPreciseBug(),
+ .ignore_nan_fp_comparisons = true,
+ .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
+ },
+ host_info{
+ .support_float16 = false,
+ .support_int64 = device.HasShaderInt64(),
+ } {
+ if (use_asynchronous_shaders) {
+ workers = CreateWorkers();
}
- return {};
}
-std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
- return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
-}
+ShaderCache::~ShaderCache() = default;
-std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
- const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
- const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
- entry.graphics_info, entry.compute_info};
- auto registry = std::make_shared<Registry>(entry.type, info);
- for (const auto& [address, value] : entry.keys) {
- const auto [buffer, offset] = address;
- registry->InsertKey(buffer, offset, value);
- }
- for (const auto& [offset, sampler] : entry.bound_samplers) {
- registry->InsertBoundSampler(offset, sampler);
+void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ if (title_id == 0) {
+ return;
}
- for (const auto& [key, sampler] : entry.bindless_samplers) {
- const auto [buffer, offset] = key;
- registry->InsertBindlessSampler(buffer, offset, sampler);
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+ const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories");
+ return;
}
- return registry;
-}
-
-std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats;
- glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+ shader_cache_filename = base_dir / "opengl.bin";
+
+ if (!workers) {
+ workers = CreateWorkers();
+ }
+ struct {
+ std::mutex mutex;
+ size_t total{};
+ size_t built{};
+ bool has_loaded{};
+ } state;
+
+ const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+ ComputePipelineKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+ workers->QueueWork(
+ [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ compute_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+ GraphicsPipelineKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+ workers->QueueWork(
+ [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ graphics_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
- std::vector<GLint> formats(num_formats);
- glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+ std::unique_lock lock{state.mutex};
+ callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+ state.has_loaded = true;
+ lock.unlock();
- std::unordered_set<GLenum> supported_formats;
- for (const GLint format : formats) {
- supported_formats.insert(static_cast<GLenum>(format));
+ workers->WaitForRequests();
+ if (!use_asynchronous_shaders) {
+ workers.reset();
}
- return supported_formats;
}
-} // Anonymous namespace
-
-ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
- const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
- if (device.UseDriverCache()) {
- // Ignore hint retrievable if we are using the driver cache
- hint_retrievable = false;
- }
- const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
- LOG_INFO(Render_OpenGL, "{}", shader_id);
-
- auto program = std::make_shared<ProgramHandle>();
-
- if (device.UseAssemblyShaders()) {
- const std::string arb =
- DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
-
- GLuint& arb_prog = program->assembly_program.handle;
-
-// Commented out functions signal OpenGL errors but are compatible with apitrace.
-// Use them only to capture and replay on apitrace.
-#if 0
- glGenProgramsNV(1, &arb_prog);
- glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
- reinterpret_cast<const GLubyte*>(arb.data()));
-#else
- glGenProgramsARB(1, &arb_prog);
- glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
- static_cast<GLsizei>(arb.size()), arb.data());
-#endif
- const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
- if (err && *err) {
- LOG_CRITICAL(Render_OpenGL, "{}", err);
- LOG_INFO(Render_OpenGL, "\n{}", arb);
- }
- } else {
- const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
- OGLShader shader;
- shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
-
- program->source_program.Create(true, hint_retrievable, shader.handle);
- }
-
- return program;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
+ if (!RefreshStages(graphics_key.unique_hashes)) {
+ current_pipeline = nullptr;
+ return nullptr;
+ }
+ const auto& regs{maxwell3d.regs};
+ graphics_key.raw = 0;
+ graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+ graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
+ ? regs.draw.topology.Value()
+ : Maxwell::PrimitiveTopology{});
+ graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
+ graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
+ graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
+ graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0);
+ if (graphics_key.xfb_enabled) {
+ SetXfbState(graphics_key.xfb_state, regs);
+ }
+ if (current_pipeline && graphics_key == current_pipeline->Key()) {
+ return BuiltPipeline(current_pipeline);
+ }
+ return CurrentGraphicsPipelineSlowPath();
}
-Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_, bool is_built_)
- : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
- is_built{is_built_} {
- handle = program->assembly_program.handle;
- if (handle == 0) {
- handle = program->source_program.handle;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() {
+ const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+ auto& pipeline{pair->second};
+ if (is_new) {
+ pipeline = CreateGraphicsPipeline();
}
- if (is_built) {
- ASSERT(handle != 0);
+ if (!pipeline) {
+ return nullptr;
}
+ current_pipeline = pipeline.get();
+ return BuiltPipeline(current_pipeline);
}
-Shader::~Shader() = default;
-
-GLuint Shader::GetHandle() const {
- DEBUG_ASSERT(registry->IsConsistent());
- return handle;
-}
-
-bool Shader::IsBuilt() const {
- return is_built;
-}
-
-void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
- program->source_program = std::move(new_program);
- handle = program->source_program.handle;
- is_built = true;
-}
-
-void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
- program->assembly_program = std::move(new_program);
- handle = program->assembly_program.handle;
- is_built = true;
-}
-
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
- const auto shader_type = GetShaderType(program_type);
-
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
- if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (!code_b.empty()) {
- // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
- // }
- auto program =
- BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
- ShaderDiskCacheEntry entry;
- entry.type = shader_type;
- entry.code = std::move(code);
- entry.code_b = std::move(code_b);
- entry.unique_identifier = params.unique_identifier;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.graphics_info = registry->GetGraphicsInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, shader_type),
- std::move(program), true));
- } else {
- // Required for entries
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- auto entries = MakeEntries(params.device, ir, shader_type);
-
- async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
- std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
- COMPILER_SETTINGS, *registry, cpu_addr);
-
- auto program = std::make_shared<ProgramHandle>();
- return std::unique_ptr<Shader>(
- new Shader(std::move(registry), std::move(entries), std::move(program), false));
+GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+ if (pipeline->IsBuilt()) {
+ return pipeline;
}
-}
-
-std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code) {
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
- const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- const u64 uid = params.unique_identifier;
- auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
-
- ShaderDiskCacheEntry entry;
- entry.type = ShaderType::Compute;
- entry.code = std::move(code);
- entry.unique_identifier = uid;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.compute_info = registry->GetComputeInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, ShaderType::Compute),
- std::move(program)));
-}
-
-std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader) {
- return std::unique_ptr<Shader>(new Shader(
- precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
-}
-
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
- Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_)
- : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
- maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
-
-ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-
-void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- disk_cache.BindTitleID(title_id);
- const std::optional transferable = disk_cache.LoadTransferable();
-
- LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
- transferable.has_value() ? transferable->size() : 0);
-
- if (!transferable) {
- return;
+ if (!use_asynchronous_shaders) {
+ return pipeline;
}
-
- std::vector<ShaderDiskCachePrecompiled> gl_cache;
- if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
- // Only load precompiled cache when we are not using assembly shaders
- gl_cache = disk_cache.LoadPrecompiled();
+ // If something is using depth, we can assume that games are not rendering anything which
+ // will be used one time.
+ if (maxwell3d.regs.zeta_enable) {
+ return nullptr;
}
- const auto supported_formats = GetSupportedFormats();
-
- // Track if precompiled cache was altered during loading to know if we have to
- // serialize the virtual precompiled cache file back to the hard drive
- bool precompiled_cache_altered = false;
-
- // Inform the frontend about shader build initialization
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
+ // If games are using a small index count, we can assume these are full screen quads.
+ // Usually these shaders are only used once for building textures so we can assume they
+ // can't be built async
+ if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ return pipeline;
}
+ return nullptr;
+}
- std::mutex mutex;
- std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
- std::atomic_bool gl_cache_failed = false;
-
- const auto find_precompiled = [&gl_cache](u64 id) {
- return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
- };
-
- const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
- std::size_t end) {
- const auto scope = context->Acquire();
-
- for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading) {
- return;
- }
- const auto& entry = (*transferable)[i];
- const u64 uid = entry.unique_identifier;
- const auto it = find_precompiled(uid);
- const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
-
- const bool is_compute = entry.type == ShaderType::Compute;
- const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- auto registry = MakeRegistry(entry);
- const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
-
- ProgramSharedPtr program;
- if (precompiled_entry) {
- // If the shader is precompiled, attempt to load it with
- program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
- if (!program) {
- gl_cache_failed = true;
- }
- }
- if (!program) {
- // Otherwise compile it from GLSL
- program = BuildShader(device, entry.type, uid, ir, *registry, true);
- }
-
- PrecompiledShader shader;
- shader.program = std::move(program);
- shader.registry = std::move(registry);
- shader.entries = MakeEntries(device, ir, entry.type);
-
- std::scoped_lock lock{mutex};
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
- transferable->size());
- }
- runtime_cache.emplace(entry.unique_identifier, std::move(shader));
- }
- };
-
- const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
- const std::size_t bucket_size{transferable->size() / num_workers};
- std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
- std::vector<std::thread> threads(num_workers);
- for (std::size_t i = 0; i < num_workers; ++i) {
- const bool is_last_worker = i + 1 == num_workers;
- const std::size_t start{bucket_size * i};
- const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
-
- // On some platforms the shared context has to be created from the GUI thread
- contexts[i] = emu_window.CreateSharedContext();
- threads[i] = std::thread(worker, contexts[i].get(), start, end);
+ComputePipeline* ShaderCache::CurrentComputePipeline() {
+ const VideoCommon::ShaderInfo* const shader{ComputeShader()};
+ if (!shader) {
+ return nullptr;
}
- for (auto& thread : threads) {
- thread.join();
+ const auto& qmd{kepler_compute.launch_description};
+ const ComputePipelineKey key{
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
+ .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+ };
+ const auto [pair, is_new]{compute_cache.try_emplace(key)};
+ auto& pipeline{pair->second};
+ if (!is_new) {
+ return pipeline.get();
}
+ pipeline = CreateComputePipeline(key, shader);
+ return pipeline.get();
+}
- if (gl_cache_failed) {
- // Invalidate the precompiled cache if a shader dumped shader was rejected
- disk_cache.InvalidatePrecompiled();
- precompiled_cache_altered = true;
- return;
- }
- if (stop_loading) {
- return;
- }
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
- if (device.UseAssemblyShaders() || device.UseDriverCache()) {
- // Don't store precompiled binaries for assembly shaders or when using the driver cache
- return;
+ main_pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
+ use_asynchronous_shaders)};
+ if (!pipeline || shader_cache_filename.empty()) {
+ return pipeline;
}
-
- // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
- // before precompiling them
-
- for (std::size_t i = 0; i < transferable->size(); ++i) {
- const u64 id = (*transferable)[i].unique_identifier;
- const auto it = find_precompiled(id);
- if (it == gl_cache.end()) {
- const GLuint program = runtime_cache.at(id).program->source_program.handle;
- disk_cache.SavePrecompiled(id, program);
- precompiled_cache_altered = true;
+ boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (graphics_key.unique_hashes[index] != 0) {
+ env_ptrs.push_back(&environments.envs[index]);
}
}
-
- if (precompiled_cache_altered) {
- disk_cache.SaveVirtualPrecompiledFile();
- }
+ SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION);
+ return pipeline;
}
-ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats) {
- if (!supported_formats.contains(precompiled_entry.binary_format)) {
- LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
- return {};
- }
-
- auto program = std::make_shared<ProgramHandle>();
- GLuint& handle = program->source_program.handle;
- handle = glCreateProgram();
- glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
- static_cast<GLsizei>(precompiled_entry.binary.size()));
-
- GLint link_status;
- glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
- if (link_status == GL_FALSE) {
- LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
- return {};
- }
-
- return program;
-}
-
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders) {
- if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
- auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
- if (last_shader->IsBuilt()) {
- return last_shader;
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
+ ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+ size_t env_index{};
+ u32 total_storage_buffers{};
+ std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+ const bool uses_vertex_a{key.unique_hashes[0] != 0};
+ const bool uses_vertex_b{key.unique_hashes[1] != 0};
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
}
- }
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
- const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (!uses_vertex_a || index != 1) {
+ // Normal path
+ programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
- if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
- auto completed_work = async_shaders.GetCompletedWork();
- for (auto& work : completed_work) {
- Shader* shader = TryGet(work.cpu_address);
- gpu.ShaderNotify().MarkShaderComplete();
- if (shader == nullptr) {
- continue;
+ for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
}
- using namespace VideoCommon::Shader;
- if (work.backend == AsyncShaders::Backend::OpenGL) {
- shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
- } else if (work.backend == AsyncShaders::Backend::GLASM) {
- shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+ } else {
+ // VertexB path when VertexA is present.
+ auto& program_va{programs[0]};
+ auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
}
-
- auto& registry = shader->GetRegistry();
-
- ShaderDiskCacheEntry entry;
- entry.type = work.shader_type;
- entry.code = std::move(work.code);
- entry.code_b = std::move(work.code_b);
- entry.unique_identifier = work.uid;
- entry.bound_buffer = registry.GetBoundBuffer();
- entry.graphics_info = registry.GetGraphicsInfo();
- entry.keys = registry.GetKeys();
- entry.bound_samplers = registry.GetBoundSamplers();
- entry.bindless_samplers = registry.GetBindlessSamplers();
- disk_cache.SaveEntry(std::move(entry));
+ programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
}
+ const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
+ const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
- // Look up shader in the cache based on address
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
- if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
- return last_shaders[static_cast<std::size_t>(program)] = shader;
- }
-
- const u8* const host_ptr{gpu_memory.GetPointer(address)};
-
- // No shader found - create a new one
- ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
- ProgramCode code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
- code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
- }
- const std::size_t code_size = code.size() * sizeof(u64);
-
- const u64 unique_identifier = GetUniqueIdentifier(
- GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-
- const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
-
- std::unique_ptr<Shader> shader;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
- async_shaders, cpu_addr.value_or(0));
- } else {
- shader = Shader::CreateFromCache(params, found->second);
- }
+ std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
- Shader* const result = shader.get();
- if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, code_size);
- } else {
- null_shader = std::move(shader);
+ std::array<std::string, 5> sources;
+ std::array<std::vector<u32>, 5> sources_spirv;
+ Shader::Backend::Bindings binding;
+ Shader::IR::Program* previous_program{};
+ const bool use_glasm{device.UseAssemblyShaders()};
+ const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
+ for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(index == 0);
+
+ Shader::IR::Program& program{programs[index]};
+ const size_t stage_index{index - 1};
+ infos[stage_index] = &program.info;
+
+ const auto runtime_info{
+ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
+ break;
+ }
+ previous_program = &program;
}
+ auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+ return std::make_unique<GraphicsPipeline>(
+ device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
+ thread_worker, &shader_notify, sources, sources_spirv, infos, key);
- return last_shaders[static_cast<std::size_t>(program)] = result;
+} catch (Shader::Exception& exception) {
+ LOG_ERROR(Render_OpenGL, "{}", exception.what());
+ return nullptr;
}
-Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
-
- if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
- return kernel;
- }
-
- // No kernel found, create a new one
- const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
- ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
- const std::size_t code_size{code.size() * sizeof(u64)};
- const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-
- const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+ const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ env.SetCachedSize(shader->size_bytes);
+
+ main_pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(main_pools, key, env)};
+ if (!pipeline || shader_cache_filename.empty()) {
+ return pipeline;
+ }
+ SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename,
+ CACHE_VERSION);
+ return pipeline;
+}
- std::unique_ptr<Shader> kernel;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- kernel = Shader::CreateKernelFromMemory(params, std::move(code));
- } else {
- kernel = Shader::CreateFromCache(params, found->second);
- }
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+ ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
+ Shader::Environment& env) try {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+
+ Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+
+ u32 num_storage_buffers{};
+ for (const auto& desc : program.info.storage_buffers_descriptors) {
+ num_storage_buffers += desc.count;
+ }
+ Shader::RuntimeInfo info;
+ info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+
+ std::string code{};
+ std::vector<u32> code_spirv;
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ code = EmitGLSL(profile, program);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ code = EmitGLASM(profile, info, program);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ code_spirv = EmitSPIRV(profile, program);
+ break;
+ }
+
+ return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
+ kepler_compute, program_manager, program.info, code,
+ code_spirv);
+} catch (Shader::Exception& exception) {
+ LOG_ERROR(Render_OpenGL, "{}", exception.what());
+ return nullptr;
+}
- Shader* const result = kernel.get();
- if (cpu_addr) {
- Register(std::move(kernel), *cpu_addr, code_size);
- } else {
- null_kernel = std::move(kernel);
- }
- return result;
+std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
+ return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
+ "yuzu:ShaderBuilder",
+ [this] { return Context{emu_window}; });
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 2aed0697e..a34110b37 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,157 +5,93 @@
#pragma once
#include <array>
-#include <atomic>
-#include <bitset>
-#include <memory>
-#include <string>
-#include <tuple>
+#include <filesystem>
+#include <stop_token>
#include <unordered_map>
-#include <unordered_set>
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
-namespace Core::Frontend {
-class EmuWindow;
-}
-
-namespace VideoCommon::Shader {
-class AsyncShaders;
-}
-
namespace OpenGL {
class Device;
+class ProgramManager;
class RasterizerOpenGL;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct ProgramHandle {
- OGLProgram source_program;
- OGLAssemblyProgram assembly_program;
-};
-using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
-
-struct PrecompiledShader {
- ProgramSharedPtr program;
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
-};
-
-struct ShaderParameters {
- Tegra::GPU& gpu;
- Tegra::Engines::ConstBufferEngineInterface& engine;
- ShaderDiskCacheOpenGL& disk_cache;
- const Device& device;
- VAddr cpu_addr;
- const u8* host_ptr;
- u64 unique_identifier;
-};
-
-ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
- u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- bool hint_retrievable = false);
-
-class Shader final {
+class ShaderCache : public VideoCommon::ShaderCache {
public:
- ~Shader();
-
- /// Gets the GL program handle for the shader
- GLuint GetHandle() const;
-
- bool IsBuilt() const;
-
- /// Gets the shader entries for the shader
- const ShaderEntries& GetEntries() const {
- return entries;
- }
-
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return *registry;
- }
-
- /// Mark a OpenGL shader as built
- void AsyncOpenGLBuilt(OGLProgram new_program);
+ explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ VideoCore::ShaderNotify& shader_notify_);
+ ~ShaderCache();
- /// Mark a GLASM shader as built
- void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback);
- static std::unique_ptr<Shader> CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b,
- VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
- static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code);
-
- static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader);
+ [[nodiscard]] ComputePipeline* CurrentComputePipeline();
private:
- explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program, bool is_built_ = true);
-
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
- ProgramSharedPtr program;
- GLuint handle = 0;
- bool is_built{};
-};
+ GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
-class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
-public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
- Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_);
- ~ShaderCacheOpenGL() override;
+ [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
- /// Loads disk cache for the current game
- void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
- /// Gets the current specified shader stage program
- Shader* GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+ ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel);
- /// Gets a compute kernel in the passed address
- Shader* GetComputeKernel(GPUVAddr code_addr);
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
+ const VideoCommon::ShaderInfo* shader);
-private:
- ProgramSharedPtr GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats);
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
+ const ComputePipelineKey& key,
+ Shader::Environment& env);
+
+ std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
- Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+ VideoCore::ShaderNotify& shader_notify;
+ const bool use_asynchronous_shaders;
+
+ GraphicsPipelineKey graphics_key{};
+ GraphicsPipeline* current_pipeline{};
- ShaderDiskCacheOpenGL disk_cache;
- std::unordered_map<u64, PrecompiledShader> runtime_cache;
+ ShaderContext::ShaderPools main_pools;
+ std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
+ std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
- std::unique_ptr<Shader> null_shader;
- std::unique_ptr<Shader> null_kernel;
+ Shader::Profile profile;
+ Shader::HostTranslateInfo host_info;
- std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+ std::filesystem::path shader_cache_filename;
+ std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
new file mode 100644
index 000000000..6ff34e5d6
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -0,0 +1,33 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/frontend/emu_window.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+
+namespace OpenGL::ShaderContext {
+struct ShaderPools {
+ void ReleaseContents() {
+ flow_block.ReleaseContents();
+ block.ReleaseContents();
+ inst.ReleaseContents();
+ }
+
+ Shader::ObjectPool<Shader::IR::Inst> inst;
+ Shader::ObjectPool<Shader::IR::Block> block;
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+};
+
+struct Context {
+ explicit Context(Core::Frontend::EmuWindow& emu_window)
+ : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
+
+ std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
+ Core::Frontend::GraphicsContext::Scoped scoped;
+ ShaderPools pools;
+};
+
+} // namespace OpenGL::ShaderContext
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index ac78d344c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/div_ceil.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Header;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::IpaSampleMode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureType;
-
-using namespace VideoCommon::Shader;
-using namespace std::string_literals;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
-
-constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
-constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
-
-struct TextureOffset {};
-struct TextureDerivates {};
-using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-
-constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-
-constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-
-class ShaderWriter final {
-public:
- void AddExpression(std::string_view text) {
- DEBUG_ASSERT(scope >= 0);
- if (!text.empty()) {
- AppendIndentation();
- }
- shader_source += text;
- }
-
- // Forwards all arguments directly to libfmt.
- // Note that all formatting requirements for fmt must be
- // obeyed when using this function. (e.g. {{ must be used
- // printing the character '{' is desirable. Ditto for }} and '}',
- // etc).
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(fmt::format(text, std::forward<Args>(args)...));
- AddNewLine();
- }
-
- void AddNewLine() {
- DEBUG_ASSERT(scope >= 0);
- shader_source += '\n';
- }
-
- std::string GenerateTemporary() {
- return fmt::format("tmp{}", temporary_index++);
- }
-
- std::string GetResult() {
- return std::move(shader_source);
- }
-
- s32 scope = 0;
-
-private:
- void AppendIndentation() {
- shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
- }
-
- std::string shader_source;
- u32 temporary_index = 1;
-};
-
-class Expression final {
-public:
- Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
- ASSERT(type != Type::Void);
- }
- Expression() : type{Type::Void} {}
-
- Type GetType() const {
- return type;
- }
-
- std::string GetCode() const {
- return code;
- }
-
- void CheckVoid() const {
- ASSERT(type == Type::Void);
- }
-
- std::string As(Type type_) const {
- switch (type_) {
- case Type::Bool:
- return AsBool();
- case Type::Bool2:
- return AsBool2();
- case Type::Float:
- return AsFloat();
- case Type::Int:
- return AsInt();
- case Type::Uint:
- return AsUint();
- case Type::HalfFloat:
- return AsHalfFloat();
- default:
- UNREACHABLE_MSG("Invalid type");
- return code;
- }
- }
-
- std::string AsBool() const {
- switch (type) {
- case Type::Bool:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsBool2() const {
- switch (type) {
- case Type::Bool2:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsFloat() const {
- switch (type) {
- case Type::Float:
- return code;
- case Type::Uint:
- return fmt::format("utof({})", code);
- case Type::Int:
- return fmt::format("itof({})", code);
- case Type::HalfFloat:
- return fmt::format("utof(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsInt() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftoi({})", code);
- case Type::Uint:
- return fmt::format("int({})", code);
- case Type::Int:
- return code;
- case Type::HalfFloat:
- return fmt::format("int(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsUint() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftou({})", code);
- case Type::Uint:
- return code;
- case Type::Int:
- return fmt::format("uint({})", code);
- case Type::HalfFloat:
- return fmt::format("packHalf2x16({})", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsHalfFloat() const {
- switch (type) {
- case Type::Float:
- return fmt::format("unpackHalf2x16(ftou({}))", code);
- case Type::Uint:
- return fmt::format("unpackHalf2x16({})", code);
- case Type::Int:
- return fmt::format("unpackHalf2x16(int({}))", code);
- case Type::HalfFloat:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
-private:
- std::string code;
- Type type{};
-};
-
-const char* GetTypeString(Type type) {
- switch (type) {
- case Type::Bool:
- return "bool";
- case Type::Bool2:
- return "bvec2";
- case Type::Float:
- return "float";
- case Type::Int:
- return "int";
- case Type::Uint:
- return "uint";
- case Type::HalfFloat:
- return "vec2";
- default:
- UNREACHABLE_MSG("Invalid type");
- return "<invalid type>";
- }
-}
-
-const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "Buffer";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "1DArray";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "2DArray";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- default:
- UNREACHABLE();
- return "1D";
- }
-}
-
-/// Describes primitive behavior on geometry shaders
-std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
- switch (topology) {
- case Maxwell::PrimitiveTopology::Points:
- return {"points", 1};
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineStrip:
- return {"lines", 2};
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return {"lines_adjacency", 4};
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return {"triangles", 3};
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return {"triangles_adjacency", 6};
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return {"points", 1};
- }
-}
-
-/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
- return swizzle.at(element);
-}
-
-constexpr const char* GetColorSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
- return swizzle.at(element);
-}
-
-/// Translate topology
-std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "points";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "line_strip";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "triangle_strip";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
- const auto& meta{operand.GetMeta()};
- if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
- return arithmetic->precise;
- }
- return false;
-}
-
-bool IsPrecise(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- return IsPrecise(*operation);
- }
- return false;
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-constexpr bool IsLegacyTexCoord(Attribute::Index index) {
- return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
- static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
-}
-
-constexpr Attribute::Index ToGenericAttribute(u64 value) {
- return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
-}
-
-constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
- return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "ssy";
- case MetaStackClass::Pbk:
- return "pbk";
- }
- return {};
-}
-
-std::string FlowStackName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
-}
-
-std::string FlowStackTopName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
-}
-
-struct GenericVaryingDescription {
- std::string name;
- u8 first_element = 0;
- bool is_scalar = false;
-};
-
-class GLSLDecompiler final {
-public:
- explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier_,
- std::string_view suffix_)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
- identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
- if (stage != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
- }
- }
-
- void Decompile() {
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareImages();
- DeclareSamplers();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareLocalMemory();
- DeclareRegisters();
- DeclarePredicates();
- DeclareInternalFlags();
- DeclareCustomVariables();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void main() {{");
- ++code.scope;
-
- if (stage == ShaderType::Vertex) {
- code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
- }
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
-
- --code.scope;
- code.AddLine("}}");
- }
-
- std::string GetResult() {
- return code.GetResult();
- }
-
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
- void DecompileBranchMode() {
- // VM's program counter
- const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}U;", first_address);
-
- // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
- // unlikely that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
- code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
- }
- }
-
- code.AddLine("while (true) {{");
- ++code.scope;
-
- code.AddLine("switch (jmp_to) {{");
-
- for (const auto& pair : ir.GetBasicBlocks()) {
- const auto& [address, bb] = pair;
- code.AddLine("case 0x{:X}U: {{", address);
- ++code.scope;
-
- VisitBlock(bb);
-
- --code.scope;
- code.AddLine("}}");
- }
-
- code.AddLine("default: return;");
- code.AddLine("}}");
-
- --code.scope;
- code.AddLine("}}");
- }
-
- void DecompileAST();
-
- void DeclareHeader() {
- if (!identifier.empty()) {
- code.AddLine("// {}", identifier);
- }
- const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
- code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
- code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
- if (device.HasShaderBallot()) {
- code.AddLine("#extension GL_ARB_shader_ballot : require");
- }
- if (device.HasVertexViewportLayer()) {
- code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
- }
- if (device.HasImageLoadFormatted()) {
- code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
- }
- if (device.HasTextureShadowLod()) {
- code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
- }
- if (device.HasWarpIntrinsics()) {
- code.AddLine("#extension GL_NV_gpu_shader5 : require");
- code.AddLine("#extension GL_NV_shader_thread_group : require");
- code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
- }
- // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
- // operations) on places where we don't want to.
- // Thanks to Ryujinx for finding this workaround.
- code.AddLine("#pragma optionNV(fastmath off)");
-
- code.AddNewLine();
-
- code.AddLine(COMMON_DECLARATIONS);
- }
-
- void DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
-
- const auto& info = registry.GetGraphicsInfo();
- const auto input_topology = info.primitive_topology;
- const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
- max_input_vertices = max_vertices;
- code.AddLine("layout ({}) in;", glsl_topology);
-
- const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_output_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
- code.AddNewLine();
-
- code.AddLine("in gl_PerVertex {{");
- ++code.scope;
- code.AddLine("vec4 gl_Position;");
- --code.scope;
- code.AddLine("}} gl_in[];");
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("in gl_PerFragment {{");
- ++code.scope;
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_Color;");
- code.AddLine("vec4 gl_SecondaryColor;");
- --code.scope;
- code.AddLine("}};");
- }
-
- for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
- }
- }
-
- void DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const auto& info = registry.GetComputeInfo();
- if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- if (size > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size, limit);
- size = limit;
- }
-
- code.AddLine("shared uint smem[{}];", size / 4);
- code.AddNewLine();
- }
- code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
- info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
- code.AddNewLine();
- }
-
- void DeclareVertexRedeclarations() {
- code.AddLine("out gl_PerVertex {{");
- ++code.scope;
-
- auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
- if (!pos_xfb.empty()) {
- pos_xfb = fmt::format("layout ({}) ", pos_xfb);
- }
- const char* pos_type =
- FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
- code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
-
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (attribute == Attribute::Index::ClipDistances0123 ||
- attribute == Attribute::Index::ClipDistances4567) {
- code.AddLine("float gl_ClipDistance[];");
- break;
- }
- }
-
- if (stage != ShaderType::Geometry &&
- (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
- if (ir.UsesLayer()) {
- code.AddLine("int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("int gl_ViewportIndex;");
- }
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
- !device.HasVertexViewportLayer()) {
- LOG_ERROR(
- Render_OpenGL,
- "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
- }
-
- if (ir.UsesPointSize()) {
- code.AddLine("float gl_PointSize;");
- }
-
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_FrontColor;");
- code.AddLine("vec4 gl_FrontSecondaryColor;");
- code.AddLine("vec4 gl_BackColor;");
- code.AddLine("vec4 gl_BackSecondaryColor;");
- }
-
- --code.scope;
- code.AddLine("}};");
- code.AddNewLine();
-
- if (stage == ShaderType::Geometry) {
- if (ir.UsesLayer()) {
- code.AddLine("out int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("out int gl_ViewportIndex;");
- }
- }
- code.AddNewLine();
- }
-
- void DeclareRegisters() {
- const auto& registers = ir.GetRegisters();
- for (const u32 gpr : registers) {
- code.AddLine("float {} = 0.0f;", GetRegister(gpr));
- }
- if (!registers.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareCustomVariables() {
- const u32 num_custom_variables = ir.GetNumCustomVariables();
- for (u32 i = 0; i < num_custom_variables; ++i) {
- code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
- }
- if (num_custom_variables > 0) {
- code.AddNewLine();
- }
- }
-
- void DeclarePredicates() {
- const auto& predicates = ir.GetPredicates();
- for (const auto pred : predicates) {
- code.AddLine("bool {} = false;", GetPredicate(pred));
- }
- if (!predicates.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareLocalMemory() {
- u64 local_memory_size = 0;
- if (stage == ShaderType::Compute) {
- local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- local_memory_size = header.GetLocalMemorySize();
- }
- if (local_memory_size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
- code.AddNewLine();
- }
-
- void DeclareInternalFlags() {
- for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
- const auto flag_code = static_cast<InternalFlag>(flag);
- code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
- }
- code.AddNewLine();
- }
-
- const char* GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "smooth";
- case PixelImap::Constant:
- return "flat";
- case PixelImap::ScreenLinear:
- return "noperspective";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
- }
-
- void DeclareInputAttributes() {
- if (ir.HasPhysicalAttributes()) {
- const u32 num_inputs{GetNumPhysicalInputAttributes()};
- for (u32 i = 0; i < num_inputs; ++i) {
- DeclareInputAttribute(ToGenericAttribute(i), true);
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetInputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareInputAttribute(index, false);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 location{GetGenericAttributeIndex(index)};
-
- std::string name{GetGenericInputAttribute(index)};
- if (stage == ShaderType::Geometry) {
- name = "gs_" + name + "[]";
- }
-
- std::string suffix_;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{header.ps.GetPixelImap(location)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix_ = GetInputFlags(input_mode);
- }
-
- code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
- }
-
- void DeclareOutputAttributes() {
- if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
- for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
- DeclareOutputAttribute(ToGenericAttribute(i));
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetOutputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareOutputAttribute(index);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return std::nullopt;
- }
- return it->second.components;
- }
-
- std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return {};
- }
-
- const VaryingTFB& tfb = it->second;
- return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
- tfb.offset, tfb.stride);
- }
-
- void DeclareOutputAttribute(Attribute::Index index) {
- static constexpr std::string_view swizzle = "xyzw";
- u8 element = 0;
- while (element < 4) {
- auto xfb = GetTransformFeedbackDecoration(index, element);
- if (!xfb.empty()) {
- xfb = fmt::format(", {}", xfb);
- }
- const std::size_t remainder = 4 - element;
- const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
- const char* const type = FLOAT_TYPES.at(num_components - 1);
-
- const u32 location = GetGenericAttributeIndex(index);
-
- GenericVaryingDescription description;
- description.first_element = static_cast<u8>(element);
- description.is_scalar = num_components == 1;
- description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
- if (element != 0 || num_components != 4) {
- const std::string_view name_swizzle = swizzle.substr(element, num_components);
- description.name = fmt::format("{}_{}", description.name, name_swizzle);
- }
- for (std::size_t i = 0; i < num_components; ++i) {
- const u8 offset = static_cast<u8>(location * 4 + element + i);
- varying_description.insert({offset, description});
- }
-
- code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
- xfb, type, description.name);
-
- element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
- }
- }
-
- void DeclareConstantBuffers() {
- u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto& [index, info] : ir.GetConstantBuffers()) {
- const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
- const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
- code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
- GetConstBufferBlock(index));
- code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareGlobalMemory() {
- u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- // Since we don't know how the shader will use the shader, hint the driver to disable as
- // much optimizations as possible
- std::string qualifier = "coherent volatile";
- if (usage.is_read && !usage.is_written) {
- qualifier += " readonly";
- } else if (usage.is_written && !usage.is_read) {
- qualifier += " writeonly";
- }
-
- code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
- GetGlobalMemoryBlock(base));
- code.AddLine(" uint {}[];", GetGlobalMemory(base));
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareSamplers() {
- u32 binding = device.GetBaseBindings(stage).sampler;
- for (const auto& sampler : ir.GetSamplers()) {
- const std::string name = GetSampler(sampler);
- const std::string description = fmt::format("layout (binding = {}) uniform", binding);
- binding += sampler.is_indexed ? sampler.size : 1;
-
- std::string sampler_type = [&]() {
- if (sampler.is_buffer) {
- return "samplerBuffer";
- }
- switch (sampler.type) {
- case TextureType::Texture1D:
- return "sampler1D";
- case TextureType::Texture2D:
- return "sampler2D";
- case TextureType::Texture3D:
- return "sampler3D";
- case TextureType::TextureCube:
- return "samplerCube";
- default:
- UNREACHABLE();
- return "sampler2D";
- }
- }();
- if (sampler.is_array) {
- sampler_type += "Array";
- }
- if (sampler.is_shadow) {
- sampler_type += "Shadow";
- }
-
- if (!sampler.is_indexed) {
- code.AddLine("{} {} {};", description, sampler_type, name);
- } else {
- code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
- }
- }
- if (!ir.GetSamplers().empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclarePhysicalAttributeReader() {
- if (!ir.HasPhysicalAttributes()) {
- return;
- }
- code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
- ++code.scope;
- code.AddLine("switch (physical_address) {{");
-
- // Just declare generic attributes for now.
- const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
- for (u32 index = 0; index < num_attributes; ++index) {
- const auto attribute{ToGenericAttribute(index)};
- for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base = 0x80;
- constexpr u32 generic_stride = 16;
- constexpr u32 element_stride = 4;
- const u32 address{generic_base + index * generic_stride + element * element_stride};
-
- const bool declared = stage != ShaderType::Fragment ||
- header.ps.GetPixelImap(index) != PixelImap::Unused;
- const std::string value =
- declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
- code.AddLine("case 0x{:X}U: return {};", address, value);
- }
- }
-
- code.AddLine("default: return 0;");
-
- code.AddLine("}}");
- --code.scope;
- code.AddLine("}}");
- code.AddNewLine();
- }
-
- void DeclareImages() {
- u32 binding = device.GetBaseBindings(stage).image;
- for (const auto& image : ir.GetImages()) {
- std::string qualifier = "coherent volatile";
- if (image.is_read && !image.is_written) {
- qualifier += " readonly";
- } else if (image.is_written && !image.is_read) {
- qualifier += " writeonly";
- }
-
- const char* format = image.is_atomic ? "r32ui, " : "";
- const char* type_declaration = GetImageTypeDeclaration(image.type);
- code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
- qualifier, type_declaration, GetImage(image));
- }
- if (!ir.GetImages().empty()) {
- code.AddNewLine();
- }
- }
-
- void VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node).CheckVoid();
- }
- }
-
- Expression Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- const auto operation_index = static_cast<std::size_t>(operation->GetCode());
- if (operation_index >= operation_decompilers.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
- return {};
- }
- const auto decompiler = operation_decompilers[operation_index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", operation_index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return {"0U", Type::Uint};
- }
- return {GetRegister(index), Type::Float};
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- const u32 index = cv->GetIndex();
- return {GetCustomVariable(index), Type::Float};
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- const u32 value = immediate->GetValue();
- if (value < 10) {
- // For eyecandy avoid using hex numbers on single digits
- return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
- }
- return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const auto value = [&]() -> std::string {
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- return "true";
- case Tegra::Shader::Pred::NeverExecute:
- return "false";
- default:
- return GetPredicate(index);
- }
- }();
- if (predicate->IsNegated()) {
- return {fmt::format("!({})", value), Type::Bool};
- }
- return {value, Type::Bool};
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
- "Physical attributes in geometry shaders are not implemented");
- if (abuf->IsPhysicalBuffer()) {
- return {fmt::format("ReadPhysicalAttribute({})",
- Visit(abuf->GetPhysicalAddress()).AsUint()),
- Type::Float};
- }
- return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- const Node offset = cbuf->GetOffset();
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
-
- // Indirect access
- const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
-
- if (!device.HasComponentIndexingBug()) {
- return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset),
- Type::Uint};
- }
-
- // AMD's proprietary GLSL compiler emits ill code for variable component access.
- // To bypass this driver bug generate 4 ifs, one per each component.
- const std::string pack = code.GenerateTemporary();
- code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
- final_offset);
-
- const std::string result = code.GenerateTemporary();
- code.AddLine("uint {};", result);
- for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
- GetSwizzle(swizzle));
- }
- return {result, Type::Uint};
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- return {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- // It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
- ++code.scope;
-
- VisitBlock(conditional->GetCode());
-
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- if (const auto comment = std::get_if<CommentNode>(&*node)) {
- code.AddLine("// " + comment->GetText());
- return {};
- }
-
- UNREACHABLE();
- return {};
- }
-
- Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
- const auto GeometryPass = [&](std::string_view name) {
- if (stage == ShaderType::Geometry && buffer) {
- // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
- // set an 0x80000000 index for those and the shader fails to build. Find out why
- // this happens and what's its intent.
- return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
- max_input_vertices.value());
- }
- return std::string(name);
- };
-
- switch (attribute) {
- case Attribute::Index::Position:
- switch (stage) {
- case ShaderType::Geometry:
- return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
- GetSwizzle(element)),
- Type::Float};
- case ShaderType::Fragment:
- return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
- default:
- UNREACHABLE();
- return {"0", Type::Int};
- }
- case Attribute::Index::FrontColor:
- return {"gl_Color"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::FrontSecondaryColor:
- return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return {"gl_PointCoord.x", Type::Float};
- case 1:
- return {"gl_PointCoord.y", Type::Float};
- case 2:
- case 3:
- return {"0.0f", Type::Float};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- // Config pack's first value is instance_id.
- return {"gl_InstanceID", Type::Int};
- case 3:
- return {"gl_VertexID", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return {"0", Type::Int};
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderType::Fragment);
- switch (element) {
- case 3:
- return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return {"0", Type::Int};
- default:
- if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
- Type::Float};
- }
- if (IsLegacyTexCoord(attribute)) {
- UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
- return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float};
- }
- break;
- }
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
- return {"0", Type::Int};
- }
-
- Expression ApplyPrecise(Operation operation, std::string value, Type type) {
- if (!IsPrecise(operation)) {
- return {std::move(value), type};
- }
- // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
- // be found in fragment shaders, so we disable precise there. There are vertex shaders that
- // also fail to build but nobody seems to care about those.
- // Note: Only bugged drivers will skip precise.
- const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
-
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
- temporary, value);
- return {std::move(temporary), type};
- }
-
- Expression VisitOperand(Operation operation, std::size_t operand_index) {
- const auto& operand = operation[operand_index];
- const bool parent_precise = IsPrecise(operation);
- const bool child_precise = IsPrecise(operand);
- const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
- if (!parent_precise || child_precise || child_trivial) {
- return Visit(operand);
- }
-
- Expression value = Visit(operand);
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
- return {std::move(temporary), value.GetType()};
- }
-
- std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
- const u32 element = abuf->GetElement();
- switch (const auto attribute = abuf->GetIndex()) {
- case Attribute::Index::Position:
- return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return std::nullopt;
- case 1:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_Layer", Type::Int}};
- case 2:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_ViewportIndex", Type::Int}};
- case 3:
- return {{"gl_PointSize", Type::Float}};
- }
- return std::nullopt;
- case Attribute::Index::FrontColor:
- return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::FrontSecondaryColor:
- return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackColor:
- return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackSecondaryColor:
- return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::ClipDistances0123:
- return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
- case Attribute::Index::ClipDistances4567:
- return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
- default:
- if (IsGenericAttribute(attribute)) {
- return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
- }
- if (IsLegacyTexCoord(attribute)) {
- return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float}};
- }
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
- return std::nullopt;
- }
- }
-
- Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
- Type type_a) {
- std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- const std::string op_d = VisitOperand(operation, 3).As(type_d);
- std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras, bool separate_dc = false) {
- constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
-
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::size_t count = operation.GetOperandsCount();
- const bool has_array = meta->sampler.is_array;
- const bool has_shadow = meta->sampler.is_shadow;
- const bool workaround_lod_array_shadow_as_grad =
- !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube);
-
- std::string expr = "texture";
-
- if (workaround_lod_array_shadow_as_grad) {
- expr += "Grad";
- } else {
- expr += function_suffix;
- }
-
- if (!meta->aoffi.empty()) {
- expr += "Offset";
- } else if (!meta->ptp.empty()) {
- expr += "Offsets";
- }
- if (!meta->sampler.is_indexed) {
- expr += '(' + GetSampler(meta->sampler) + ", ";
- } else {
- expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
- }
- expr += coord_constructors.at(count + (has_array ? 1 : 0) +
- (has_shadow && !separate_dc ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]).AsFloat();
-
- const std::size_t next = i + 1;
- if (next < count)
- expr += ", ";
- }
- if (has_array) {
- expr += ", float(" + Visit(meta->array).AsInt() + ')';
- }
- if (has_shadow) {
- if (separate_dc) {
- expr += "), " + Visit(meta->depth_compare).AsFloat();
- } else {
- expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
- }
- } else {
- expr += ')';
- }
-
- if (workaround_lod_array_shadow_as_grad) {
- switch (meta->sampler.type) {
- case TextureType::Texture2D:
- return expr + ", vec2(0.0), vec2(0.0))";
- case TextureType::TextureCube:
- return expr + ", vec3(0.0), vec3(0.0))";
- default:
- UNREACHABLE();
- break;
- }
- }
-
- for (const auto& variant : extras) {
- if (const auto argument = std::get_if<TextureArgument>(&variant)) {
- expr += GenerateTextureArgument(*argument);
- } else if (std::holds_alternative<TextureOffset>(variant)) {
- if (!meta->aoffi.empty()) {
- expr += GenerateTextureAoffi(meta->aoffi);
- } else if (!meta->ptp.empty()) {
- expr += GenerateTexturePtp(meta->ptp);
- }
- } else if (std::holds_alternative<TextureDerivates>(variant)) {
- expr += GenerateTextureDerivates(meta->derivates);
- } else {
- UNREACHABLE();
- }
- }
-
- return expr + ')';
- }
-
- std::string GenerateTextureArgument(const TextureArgument& argument) {
- const auto& [type, operand] = argument;
- if (operand == nullptr) {
- return {};
- }
-
- std::string expr = ", ";
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += Visit(operand).AsInt();
- }
- break;
- case Type::Float:
- expr += Visit(operand).AsFloat();
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
- }
- }
- return expr;
- }
-
- std::string ReadTextureOffset(const Node& value) {
- if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
- // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
- // to be constant by the standard).
- return std::to_string(static_cast<s32>(immediate->GetValue()));
- } else if (device.HasVariableAoffi()) {
- // Avoid using variable AOFFI on unsupported devices.
- return Visit(value).AsInt();
- } else {
- // Insert 0 on devices not supporting variable AOFFI.
- return "0";
- }
- }
-
- std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
- if (aoffi.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
- std::string expr = ", ";
- expr += coord_constructors.at(aoffi.size() - 1);
- expr += '(';
-
- for (std::size_t index = 0; index < aoffi.size(); ++index) {
- expr += ReadTextureOffset(aoffi.at(index));
- if (index + 1 < aoffi.size()) {
- expr += ", ";
- }
- }
- expr += ')';
-
- return expr;
- }
-
- std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
- static constexpr std::size_t num_vectors = 4;
- ASSERT(ptp.size() == num_vectors * 2);
-
- std::string expr = ", ivec2[](";
- for (std::size_t vector = 0; vector < num_vectors; ++vector) {
- const bool has_next = vector + 1 < num_vectors;
- expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
- ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
- }
- expr += ')';
- return expr;
- }
-
- std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
- if (derivates.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
- std::string expr = ", ";
- const std::size_t components = derivates.size() / 2;
- std::string dx = coord_constructors.at(components - 1);
- std::string dy = coord_constructors.at(components - 1);
- dx += '(';
- dy += '(';
-
- for (std::size_t index = 0; index < components; ++index) {
- const auto& operand_x{derivates.at(index * 2)};
- const auto& operand_y{derivates.at(index * 2 + 1)};
- dx += Visit(operand_x).AsFloat();
- dy += Visit(operand_y).AsFloat();
-
- if (index + 1 < components) {
- dx += ", ";
- dy += ", ";
- }
- }
- dx += ')';
- dy += ')';
- expr += dx + ", " + dy;
-
- return expr;
- }
-
- std::string BuildIntegerCoordinates(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
- const std::size_t coords_count{operation.GetOperandsCount()};
- std::string expr = constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- std::string BuildImageValues(Operation operation) {
- constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
- const std::size_t values_count{meta.values.size()};
- std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsUint();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- Expression Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Expression target;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op but we still have to visit the source
- // as it might have side effects.
- code.AddLine("{};", Visit(src).GetCode());
- return {};
- }
- target = {GetRegister(gpr->GetIndex()), Type::Float};
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- auto output = GetOutputAttribute(abuf);
- if (!output) {
- return {};
- }
- target = std::move(*output);
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- target = {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- ASSERT(stage == ShaderType::Compute);
- target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
- target = {GetCustomVariable(cv->GetIndex()), Type::Float};
- } else {
- UNREACHABLE_MSG("Assign called without a proper target");
- }
-
- code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
- return {};
- }
-
- template <Type type>
- Expression Add(Operation operation) {
- return GenerateBinaryInfix(operation, "+", type, type, type);
- }
-
- template <Type type>
- Expression Mul(Operation operation) {
- return GenerateBinaryInfix(operation, "*", type, type, type);
- }
-
- template <Type type>
- Expression Div(Operation operation) {
- return GenerateBinaryInfix(operation, "/", type, type, type);
- }
-
- template <Type type>
- Expression Fma(Operation operation) {
- return GenerateTernary(operation, "fma", type, type, type, type);
- }
-
- template <Type type>
- Expression Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type);
- }
-
- template <Type type>
- Expression Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type);
- }
-
- Expression FClamp(Operation operation) {
- return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
- Type::Float);
- }
-
- Expression FCastHalf0(Operation operation) {
- return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression FCastHalf1(Operation operation) {
- return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- template <Type type>
- Expression Min(Operation operation) {
- return GenerateBinaryCall(operation, "min", type, type, type);
- }
-
- template <Type type>
- Expression Max(Operation operation) {
- return GenerateBinaryCall(operation, "max", type, type, type);
- }
-
- Expression Select(Operation operation) {
- const std::string condition = Visit(operation[0]).AsBool();
- const std::string true_case = Visit(operation[1]).AsUint();
- const std::string false_case = Visit(operation[2]).AsUint();
- std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Uint);
- }
-
- Expression FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float);
- }
-
- Expression FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float);
- }
-
- Expression FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
- }
-
- Expression FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float);
- }
-
- Expression FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
- }
-
- Expression FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
- }
-
- Expression FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
- }
-
- Expression FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float);
- }
-
- Expression FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
- }
-
- Expression FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
- }
-
- template <Type type>
- Expression FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type);
- }
-
- Expression FSwizzleAdd(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsFloat();
- const std::string op_b = VisitOperand(operation, 1).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {fmt::format("{} + {}", op_a, op_b), Type::Float};
- }
-
- const std::string instr_mask = VisitOperand(operation, 2).AsUint();
- const std::string mask = code.GenerateTemporary();
- code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
- instr_mask);
-
- const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
- const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
- return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
- Type::Float};
- }
-
- Expression ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float);
- }
-
- Expression ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression LogicalShiftLeft(Operation operation) {
- return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
- }
-
- Expression ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
- const std::string op_b = VisitOperand(operation, 1).AsUint();
- std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Int);
- }
-
- Expression IArithmeticShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression BitwiseAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseOr(Operation operation) {
- return GenerateBinaryInfix(operation, "|", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type);
- }
-
- Expression UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
- }
-
- Expression UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
- }
-
- Expression UShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
- }
-
- template <Type type>
- Expression BitfieldInsert(Operation operation) {
- return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
- Type::Int);
- }
-
- template <Type type>
- Expression BitfieldExtract(Operation operation) {
- return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
- }
-
- template <Type type>
- Expression BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type);
- }
-
- template <Type type>
- Expression BitMSB(Operation operation) {
- return GenerateUnary(operation, "findMSB", type, type);
- }
-
- Expression HNegate(Operation operation) {
- const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
- };
- return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
- GetNegate(1), GetNegate(2)),
- Type::HalfFloat};
- }
-
- Expression HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsHalfFloat();
- const std::string min = VisitOperand(operation, 1).AsFloat();
- const std::string max = VisitOperand(operation, 2).AsFloat();
- std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
- }
-
- Expression HCastFloat(Operation operation) {
- return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
- Type::HalfFloat};
- }
-
- Expression HUnpack(Operation operation) {
- Expression operand = VisitOperand(operation, 0);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H0_H0:
- return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H1_H1:
- return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression HMergeF32(Operation operation) {
- return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression HMergeH0(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
- Type::HalfFloat};
- }
-
- Expression HMergeH1(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
- Type::HalfFloat};
- }
-
- Expression HPack2(Operation operation) {
- return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::HalfFloat};
- }
-
- template <const std::string_view& op, Type type, bool unordered = false>
- Expression Comparison(Operation operation) {
- static_assert(!unordered || type == Type::Float);
-
- Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
-
- if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
- // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
- // and Nvidia's proprietary stacks. Manually force an ordered comparison.
- return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
- if constexpr (!unordered) {
- return expr;
- }
- // Unordered comparisons are always true for NaN operands.
- return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FOrdered(Operation operation) {
- return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FUnordered(Operation operation) {
- return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression LogicalAddCarry(Operation operation) {
- const std::string carry = code.GenerateTemporary();
- code.AddLine("uint {};", carry);
- code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
- VisitOperand(operation, 1).AsUint(), carry);
- return {fmt::format("({} != 0)", carry), Type::Bool};
- }
-
- Expression LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = GetPredicate(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
- target = GetInternalFlag(flag->GetFlag());
- }
-
- code.AddLine("{} = {};", target, Visit(src).AsBool());
- return {};
- }
-
- Expression LogicalAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalOr(Operation operation) {
- return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
- }
-
- Expression LogicalPick2(Operation operation) {
- return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
- VisitOperand(operation, 1).AsUint()),
- Type::Bool};
- }
-
- Expression LogicalAnd2(Operation operation) {
- return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
- }
-
- template <bool with_nan>
- Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
- Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat);
- if constexpr (!with_nan) {
- return comparison;
- }
- return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
- VisitOperand(operation, 0).AsHalfFloat(),
- VisitOperand(operation, 1).AsHalfFloat()),
- Type::Bool2};
- }
-
- template <bool with_nan>
- Expression Logical2HLessThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThan");
- }
-
- template <bool with_nan>
- Expression Logical2HEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "equal");
- }
-
- template <bool with_nan>
- Expression Logical2HLessEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThan");
- }
-
- template <bool with_nan>
- Expression Logical2HNotEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "notEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
- }
-
- Expression Texture(Operation operation) {
- const auto meta = std::get<MetaTexture>(operation.GetMeta());
- const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
- meta.sampler.is_array && meta.sampler.is_shadow;
- // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
- const std::vector<TextureIR> extras{
- TextureOffset{},
- TextureArgument{Type::Float, meta.bias},
- };
- std::string expr = GenerateTexture(operation, "", extras, separate_dc);
- if (meta.sampler.is_shadow) {
- expr = fmt::format("vec4({})", expr);
- }
- return {expr + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression TextureLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- std::string expr{};
-
- if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube)) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
- expr = GenerateTexture(operation, "Lod", {});
- } else {
- expr = GenerateTexture(operation, "Lod",
- {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
- }
-
- if (meta->sampler.is_shadow) {
- expr = "vec4(" + expr + ')';
- }
- return {expr + GetSwizzle(meta->element), Type::Float};
- }
-
- Expression TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
- const bool separate_dc = meta.sampler.is_shadow;
-
- std::vector<TextureIR> ir_;
- if (meta.sampler.is_shadow) {
- ir_ = {TextureOffset{}};
- } else {
- ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
- }
- return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
- Type::Float};
- }
-
- Expression TextureQueryDimensions(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0).AsInt();
-
- switch (meta->element) {
- case 0:
- case 1:
- return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
- Type::Int};
- case 3:
- return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression TextureQueryLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- if (meta->element < 2) {
- return {fmt::format("int(({} * vec2(256)){})",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element)),
- Type::Int};
- }
- return {"0", Type::Int};
- }
-
- Expression TexelFetch(Operation operation) {
- constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
- UNIMPLEMENTED_IF(meta->sampler.is_array);
- const std::size_t count = operation.GetOperandsCount();
-
- std::string expr = "texelFetch(";
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
- expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- if (i > 0) {
- expr += ", ";
- }
- expr += VisitOperand(operation, i).AsInt();
- }
- if (meta->array) {
- expr += ", ";
- expr += Visit(meta->array).AsInt();
- }
- expr += ')';
-
- if (meta->lod && !meta->sampler.is_buffer) {
- expr += ", ";
- expr += Visit(meta->lod).AsInt();
- }
- expr += ')';
- expr += GetSwizzle(meta->element);
-
- return {std::move(expr), Type::Float};
- }
-
- Expression TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- std::string expr =
- GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
- return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression ImageLoad(Operation operation) {
- if (!device.HasImageLoadFormatted()) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
- return {"0", Type::Int};
- }
-
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
- BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
- Type::Uint};
- }
-
- Expression ImageStore(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
- BuildIntegerCoordinates(operation), BuildImageValues(operation));
- return {};
- }
-
- template <const std::string_view& opname>
- Expression AtomicImage(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- ASSERT(meta.values.size() == 1);
-
- return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
- BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Atomic(Operation operation) {
- if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
- UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
- return {};
- }
- return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
- Visit(operation[1]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Reduce(Operation operation) {
- code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
- return {};
- }
-
- Expression Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
- code.AddLine("break;");
- return {};
- }
-
- Expression BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
-
- code.AddLine("jmp_to = {};", op_a);
- code.AddLine("break;");
- return {};
- }
-
- Expression PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
- target->GetValue());
- return {};
- }
-
- Expression PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
- code.AddLine("break;");
- return {};
- }
-
- void PreExit() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> Expression {
- // TODO(Rodrigo): Replace with contains once C++20 releases
- if (used_registers.find(reg) != used_registers.end()) {
- return {GetRegister(reg), Type::Float};
- }
- return {"0.0f", Type::Float};
- };
-
- UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
-
- // Write the color outputs using the data in the shader registers, disabled
- // rendertargets/components are skipped in the register assignment.
- u32 current_reg = 0;
- for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
- // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
- SafeGetRegister(current_reg).AsFloat());
- ++current_reg;
- }
- }
- }
- if (header.ps.omap.depth) {
- // The depth output is always 2 registers after the last color output, and current_reg
- // already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
- }
- }
-
- Expression Exit(Operation operation) {
- PreExit();
- code.AddLine("return;");
- return {};
- }
-
- Expression Discard(Operation operation) {
- // Enclose "discard" in a conditional, so that GLSL compilation does not complain
- // about unexecuted instructions that may follow this.
- code.AddLine("if (true) {{");
- ++code.scope;
- code.AddLine("discard;");
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- Expression EmitVertex(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EmitVertex is expected to be used in a geometry shader.");
- code.AddLine("EmitVertex();");
- return {};
- }
-
- Expression EndPrimitive(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EndPrimitive is expected to be used in a geometry shader.");
- code.AddLine("EndPrimitive();");
- return {};
- }
-
- Expression InvocationId(Operation operation) {
- return {"gl_InvocationID", Type::Int};
- }
-
- Expression YNegate(Operation operation) {
- // Y_NEGATE is mapped to this uniform value
- return {"gl_FrontMaterial.ambient.a", Type::Float};
- }
-
- template <u32 element>
- Expression LocalInvocationId(Operation) {
- return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
- }
-
- template <u32 element>
- Expression WorkGroupId(Operation) {
- return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
- }
-
- Expression BallotThread(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub on non-Nvidia devices by simulating all threads voting the same as the active
- // one.
- return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
- }
- return {fmt::format("ballotThreadNV({})", value), Type::Uint};
- }
-
- Expression Vote(Operation operation, const char* func) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub with a warp size of one.
- return {value, Type::Bool};
- }
- return {fmt::format("{}({})", func, value), Type::Bool};
- }
-
- Expression VoteAll(Operation operation) {
- return Vote(operation, "allThreadsNV");
- }
-
- Expression VoteAny(Operation operation) {
- return Vote(operation, "anyThreadNV");
- }
-
- Expression VoteEqual(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // We must return true here since a stub for a theoretical warp size of 1.
- // This will always return an equal result across all votes.
- return {"true", Type::Bool};
- }
- return Vote(operation, "allThreadsEqualNV");
- }
-
- Expression ThreadId(Operation operation) {
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {"0U", Type::Uint};
- }
- return {"gl_SubGroupInvocationARB", Type::Uint};
- }
-
- template <const std::string_view& comparison>
- Expression ThreadMask(Operation) {
- if (device.HasWarpIntrinsics()) {
- return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
- }
- if (device.HasShaderBallot()) {
- return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
- }
- LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
- return {"0U", Type::Uint};
- }
-
- Expression ShuffleIndexed(Operation operation) {
- std::string value = VisitOperand(operation, 0).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {std::move(value), Type::Float};
- }
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
- }
-
- Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
- return {};
- }
- code.AddLine("barrier();");
- return {};
- }
-
- Expression MemoryBarrierGroup(Operation) {
- code.AddLine("groupMemoryBarrier();");
- return {};
- }
-
- Expression MemoryBarrierGlobal(Operation) {
- code.AddLine("memoryBarrier();");
- return {};
- }
-
- struct Func final {
- Func() = delete;
- ~Func() = delete;
-
- static constexpr std::string_view LessThan = "<";
- static constexpr std::string_view Equal = "==";
- static constexpr std::string_view LessEqual = "<=";
- static constexpr std::string_view GreaterThan = ">";
- static constexpr std::string_view NotEqual = "!=";
- static constexpr std::string_view GreaterEqual = ">=";
-
- static constexpr std::string_view Eq = "Eq";
- static constexpr std::string_view Ge = "Ge";
- static constexpr std::string_view Gt = "Gt";
- static constexpr std::string_view Le = "Le";
- static constexpr std::string_view Lt = "Lt";
-
- static constexpr std::string_view Add = "Add";
- static constexpr std::string_view Min = "Min";
- static constexpr std::string_view Max = "Max";
- static constexpr std::string_view And = "And";
- static constexpr std::string_view Or = "Or";
- static constexpr std::string_view Xor = "Xor";
- static constexpr std::string_view Exchange = "Exchange";
- };
-
- static constexpr std::array operation_decompilers = {
- &GLSLDecompiler::Assign,
-
- &GLSLDecompiler::Select,
-
- &GLSLDecompiler::Add<Type::Float>,
- &GLSLDecompiler::Mul<Type::Float>,
- &GLSLDecompiler::Div<Type::Float>,
- &GLSLDecompiler::Fma<Type::Float>,
- &GLSLDecompiler::Negate<Type::Float>,
- &GLSLDecompiler::Absolute<Type::Float>,
- &GLSLDecompiler::FClamp,
- &GLSLDecompiler::FCastHalf0,
- &GLSLDecompiler::FCastHalf1,
- &GLSLDecompiler::Min<Type::Float>,
- &GLSLDecompiler::Max<Type::Float>,
- &GLSLDecompiler::FCos,
- &GLSLDecompiler::FSin,
- &GLSLDecompiler::FExp2,
- &GLSLDecompiler::FLog2,
- &GLSLDecompiler::FInverseSqrt,
- &GLSLDecompiler::FSqrt,
- &GLSLDecompiler::FRoundEven,
- &GLSLDecompiler::FFloor,
- &GLSLDecompiler::FCeil,
- &GLSLDecompiler::FTrunc,
- &GLSLDecompiler::FCastInteger<Type::Int>,
- &GLSLDecompiler::FCastInteger<Type::Uint>,
- &GLSLDecompiler::FSwizzleAdd,
-
- &GLSLDecompiler::Add<Type::Int>,
- &GLSLDecompiler::Mul<Type::Int>,
- &GLSLDecompiler::Div<Type::Int>,
- &GLSLDecompiler::Negate<Type::Int>,
- &GLSLDecompiler::Absolute<Type::Int>,
- &GLSLDecompiler::Min<Type::Int>,
- &GLSLDecompiler::Max<Type::Int>,
-
- &GLSLDecompiler::ICastFloat,
- &GLSLDecompiler::ICastUnsigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
- &GLSLDecompiler::ILogicalShiftRight,
- &GLSLDecompiler::IArithmeticShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Int>,
- &GLSLDecompiler::BitwiseOr<Type::Int>,
- &GLSLDecompiler::BitwiseXor<Type::Int>,
- &GLSLDecompiler::BitwiseNot<Type::Int>,
- &GLSLDecompiler::BitfieldInsert<Type::Int>,
- &GLSLDecompiler::BitfieldExtract<Type::Int>,
- &GLSLDecompiler::BitCount<Type::Int>,
- &GLSLDecompiler::BitMSB<Type::Int>,
-
- &GLSLDecompiler::Add<Type::Uint>,
- &GLSLDecompiler::Mul<Type::Uint>,
- &GLSLDecompiler::Div<Type::Uint>,
- &GLSLDecompiler::Min<Type::Uint>,
- &GLSLDecompiler::Max<Type::Uint>,
- &GLSLDecompiler::UCastFloat,
- &GLSLDecompiler::UCastSigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Uint>,
- &GLSLDecompiler::BitwiseOr<Type::Uint>,
- &GLSLDecompiler::BitwiseXor<Type::Uint>,
- &GLSLDecompiler::BitwiseNot<Type::Uint>,
- &GLSLDecompiler::BitfieldInsert<Type::Uint>,
- &GLSLDecompiler::BitfieldExtract<Type::Uint>,
- &GLSLDecompiler::BitCount<Type::Uint>,
- &GLSLDecompiler::BitMSB<Type::Uint>,
-
- &GLSLDecompiler::Add<Type::HalfFloat>,
- &GLSLDecompiler::Mul<Type::HalfFloat>,
- &GLSLDecompiler::Fma<Type::HalfFloat>,
- &GLSLDecompiler::Absolute<Type::HalfFloat>,
- &GLSLDecompiler::HNegate,
- &GLSLDecompiler::HClamp,
- &GLSLDecompiler::HCastFloat,
- &GLSLDecompiler::HUnpack,
- &GLSLDecompiler::HMergeF32,
- &GLSLDecompiler::HMergeH0,
- &GLSLDecompiler::HMergeH1,
- &GLSLDecompiler::HPack2,
-
- &GLSLDecompiler::LogicalAssign,
- &GLSLDecompiler::LogicalAnd,
- &GLSLDecompiler::LogicalOr,
- &GLSLDecompiler::LogicalXor,
- &GLSLDecompiler::LogicalNegate,
- &GLSLDecompiler::LogicalPick2,
- &GLSLDecompiler::LogicalAnd2,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
- &GLSLDecompiler::FOrdered,
- &GLSLDecompiler::FUnordered,
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
-
- &GLSLDecompiler::LogicalAddCarry,
-
- &GLSLDecompiler::Logical2HLessThan<false>,
- &GLSLDecompiler::Logical2HEqual<false>,
- &GLSLDecompiler::Logical2HLessEqual<false>,
- &GLSLDecompiler::Logical2HGreaterThan<false>,
- &GLSLDecompiler::Logical2HNotEqual<false>,
- &GLSLDecompiler::Logical2HGreaterEqual<false>,
- &GLSLDecompiler::Logical2HLessThan<true>,
- &GLSLDecompiler::Logical2HEqual<true>,
- &GLSLDecompiler::Logical2HLessEqual<true>,
- &GLSLDecompiler::Logical2HGreaterThan<true>,
- &GLSLDecompiler::Logical2HNotEqual<true>,
- &GLSLDecompiler::Logical2HGreaterEqual<true>,
-
- &GLSLDecompiler::Texture,
- &GLSLDecompiler::TextureLod,
- &GLSLDecompiler::TextureGather,
- &GLSLDecompiler::TextureQueryDimensions,
- &GLSLDecompiler::TextureQueryLod,
- &GLSLDecompiler::TexelFetch,
- &GLSLDecompiler::TextureGradient,
-
- &GLSLDecompiler::ImageLoad,
- &GLSLDecompiler::ImageStore,
-
- &GLSLDecompiler::AtomicImage<Func::Add>,
- &GLSLDecompiler::AtomicImage<Func::And>,
- &GLSLDecompiler::AtomicImage<Func::Or>,
- &GLSLDecompiler::AtomicImage<Func::Xor>,
- &GLSLDecompiler::AtomicImage<Func::Exchange>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
- &GLSLDecompiler::Atomic<Func::And, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
- &GLSLDecompiler::Reduce<Func::And, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Branch,
- &GLSLDecompiler::BranchIndirect,
- &GLSLDecompiler::PushFlowStack,
- &GLSLDecompiler::PopFlowStack,
- &GLSLDecompiler::Exit,
- &GLSLDecompiler::Discard,
-
- &GLSLDecompiler::EmitVertex,
- &GLSLDecompiler::EndPrimitive,
-
- &GLSLDecompiler::InvocationId,
- &GLSLDecompiler::YNegate,
- &GLSLDecompiler::LocalInvocationId<0>,
- &GLSLDecompiler::LocalInvocationId<1>,
- &GLSLDecompiler::LocalInvocationId<2>,
- &GLSLDecompiler::WorkGroupId<0>,
- &GLSLDecompiler::WorkGroupId<1>,
- &GLSLDecompiler::WorkGroupId<2>,
-
- &GLSLDecompiler::BallotThread,
- &GLSLDecompiler::VoteAll,
- &GLSLDecompiler::VoteAny,
- &GLSLDecompiler::VoteEqual,
-
- &GLSLDecompiler::ThreadId,
- &GLSLDecompiler::ThreadMask<Func::Eq>,
- &GLSLDecompiler::ThreadMask<Func::Ge>,
- &GLSLDecompiler::ThreadMask<Func::Gt>,
- &GLSLDecompiler::ThreadMask<Func::Le>,
- &GLSLDecompiler::ThreadMask<Func::Lt>,
- &GLSLDecompiler::ShuffleIndexed,
-
- &GLSLDecompiler::Barrier,
- &GLSLDecompiler::MemoryBarrierGroup,
- &GLSLDecompiler::MemoryBarrierGlobal,
- };
- static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
- std::string GetRegister(u32 index) const {
- return AppendSuffix(index, "gpr");
- }
-
- std::string GetCustomVariable(u32 index) const {
- return AppendSuffix(index, "custom_var");
- }
-
- std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return AppendSuffix(static_cast<u32>(pred), "pred");
- }
-
- std::string GetGenericInputAttribute(Attribute::Index attribute) const {
- return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
- }
-
- std::unordered_map<u8, GenericVaryingDescription> varying_description;
-
- std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
- const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
- const auto& description = varying_description.at(offset);
- if (description.is_scalar) {
- return description.name;
- }
- return fmt::format("{}[{}]", description.name, element - description.first_element);
- }
-
- std::string GetConstBuffer(u32 index) const {
- return AppendSuffix(index, "cbuf");
- }
-
- std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
- }
-
- std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
- suffix);
- }
-
- std::string GetConstBufferBlock(u32 index) const {
- return AppendSuffix(index, "cbuf_block");
- }
-
- std::string GetLocalMemory() const {
- if (suffix.empty()) {
- return "lmem";
- } else {
- return "lmem_" + std::string{suffix};
- }
- }
-
- std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
- "overflow_flag"};
- const auto index = static_cast<u32>(flag);
- ASSERT(index < static_cast<u32>(InternalFlag::Amount));
-
- if (suffix.empty()) {
- return InternalFlagNames[index];
- } else {
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
- }
- }
-
- std::string GetSampler(const SamplerEntry& sampler) const {
- return AppendSuffix(sampler.index, "sampler");
- }
-
- std::string GetImage(const ImageEntry& image) const {
- return AppendSuffix(image.index, "image");
- }
-
- std::string AppendSuffix(u32 index, std::string_view name) const {
- if (suffix.empty()) {
- return fmt::format("{}{}", name, index);
- } else {
- return fmt::format("{}{}_{}", name, index, suffix);
- }
- }
-
- u32 GetNumPhysicalInputAttributes() const {
- return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
- }
-
- u32 GetNumPhysicalAttributes() const {
- return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
- }
-
- u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
- const std::string_view identifier;
- const std::string_view suffix;
- const Header header;
- std::unordered_map<u8, VaryingTFB> transform_feedback;
-
- ShaderWriter code;
-
- std::optional<u32> max_input_vertices;
-};
-
-std::string GetFlowVariable(u32 index) {
- return fmt::format("flow_var{}", index);
-}
-
-class ExprDecompiler {
-public:
- explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ExprAnd& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " && ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprOr& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " || ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprNot& expr) {
- inner += '!';
- std::visit(*this, *expr.operand1);
- }
-
- void operator()(const ExprPredicate& expr) {
- const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
- inner += decomp.GetPredicate(pred);
- }
-
- void operator()(const ExprCondCode& expr) {
- inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
- }
-
- void operator()(const ExprVar& expr) {
- inner += GetFlowVariable(expr.var_index);
- }
-
- void operator()(const ExprBoolean& expr) {
- inner += expr.value ? "true" : "false";
- }
-
- void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
- inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- GLSLDecompiler& decomp;
- std::string inner;
-};
-
-class ASTDecompiler {
-public:
- explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()(const ASTIfElse& ast) {
- decomp.code.AddLine("else {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTBlockDecoded& ast) {
- decomp.VisitBlock(ast.nodes);
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
- }
-
- void operator()(const ASTLabel& ast) {
- decomp.code.AddLine("// Label_{}:", ast.index);
- }
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTDoWhile& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("do {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}} while({});", expr_parser.GetResult());
- }
-
- void operator()(const ASTReturn& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- if (ast.kills) {
- decomp.code.AddLine("discard;");
- } else {
- decomp.PreExit();
- decomp.code.AddLine("return;");
- }
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void operator()(const ASTBreak& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- decomp.code.AddLine("break;");
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
-private:
- GLSLDecompiler& decomp;
-};
-
-void GLSLDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; i++) {
- code.AddLine("bool {} = false;", GetFlowVariable(i));
- }
-
- ASTDecompiler decompiler{*this};
- decompiler.Visit(ir.GetASTProgram());
-}
-
-} // Anonymous namespace
-
-ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
- cbuf.first);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
- usage.is_written);
- }
- for (const auto& sampler : ir.GetSamplers()) {
- entries.samplers.emplace_back(sampler);
- }
- for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
- }
- const auto clip_distances = ir.GetClipDistances();
- for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
- entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
- }
- for (const auto& buffer : entries.const_buffers) {
- entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
- }
- entries.shader_length = ir.GetLength();
- return entries;
-}
-
-std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier,
- std::string_view suffix) {
- GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
- decompiler.Decompile();
- return decompiler.GetResult();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
- explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
- : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
-
- u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index = 0;
-};
-
-struct GlobalMemoryEntry {
- constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
- bool is_written_)
- : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
- is_written_} {}
-
- u32 cbuf_index = 0;
- u32 cbuf_offset = 0;
- bool is_read = false;
- bool is_written = false;
-};
-
-struct ShaderEntries {
- std::vector<ConstBufferEntry> const_buffers;
- std::vector<GlobalMemoryEntry> global_memory_entries;
- std::vector<SamplerEntry> samplers;
- std::vector<ImageEntry> images;
- std::size_t shader_length{};
- u32 clip_distances{};
- u32 enabled_uniform_buffers{};
-};
-
-ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage);
-
-std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier,
- std::string_view suffix = {});
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/fs/file.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "common/settings.h"
-#include "common/zstd_compression.h"
-#include "core/core.h"
-#include "core/hle/kernel/k_process.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-
-namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::BindlessSamplerMap;
-using VideoCommon::Shader::BoundSamplerMap;
-using VideoCommon::Shader::KeyMap;
-using VideoCommon::Shader::SeparateSamplerKey;
-using ShaderCacheVersionHash = std::array<u8, 64>;
-
-struct ConstBufferKey {
- u32 cbuf = 0;
- u32 offset = 0;
- u32 value = 0;
-};
-
-struct BoundSamplerEntry {
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct SeparateSamplerEntry {
- u32 cbuf1 = 0;
- u32 cbuf2 = 0;
- u32 offset1 = 0;
- u32 offset2 = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct BindlessSamplerEntry {
- u32 cbuf = 0;
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-namespace {
-
-constexpr u32 NativeVersion = 21;
-
-ShaderCacheVersionHash GetShaderCacheVersionHash() {
- ShaderCacheVersionHash hash{};
- const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
- std::memcpy(hash.data(), Common::g_shader_cache_version, length);
- return hash;
-}
-
-} // Anonymous namespace
-
-ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-
-ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-
-bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
- if (!file.ReadObject(type)) {
- return false;
- }
- u32 code_size;
- u32 code_size_b;
- if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
- return false;
- }
- code.resize(code_size);
- code_b.resize(code_size_b);
- if (file.Read(code) != code_size) {
- return false;
- }
- if (HasProgramA() && file.Read(code_b) != code_size_b) {
- return false;
- }
-
- u8 is_texture_handler_size_known;
- u32 texture_handler_size_value;
- u32 num_keys;
- u32 num_bound_samplers;
- u32 num_separate_samplers;
- u32 num_bindless_samplers;
- if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
- !file.ReadObject(is_texture_handler_size_known) ||
- !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
- !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
- !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
- !file.ReadObject(num_bindless_samplers)) {
- return false;
- }
- if (is_texture_handler_size_known) {
- texture_handler_size = texture_handler_size_value;
- }
-
- std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
- std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
- std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
- if (file.Read(flat_keys) != flat_keys.size() ||
- file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
- file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
- file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
- return false;
- }
- for (const auto& entry : flat_keys) {
- keys.insert({{entry.cbuf, entry.offset}, entry.value});
- }
- for (const auto& entry : flat_bound_samplers) {
- bound_samplers.emplace(entry.offset, entry.sampler);
- }
- for (const auto& entry : flat_separate_samplers) {
- SeparateSamplerKey key;
- key.buffers = {entry.cbuf1, entry.cbuf2};
- key.offsets = {entry.offset1, entry.offset2};
- separate_samplers.emplace(key, entry.sampler);
- }
- for (const auto& entry : flat_bindless_samplers) {
- bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
- }
-
- return true;
-}
-
-bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
- if (!file.WriteObject(static_cast<u32>(type)) ||
- !file.WriteObject(static_cast<u32>(code.size())) ||
- !file.WriteObject(static_cast<u32>(code_b.size()))) {
- return false;
- }
- if (file.Write(code) != code.size()) {
- return false;
- }
- if (HasProgramA() && file.Write(code_b) != code_b.size()) {
- return false;
- }
-
- if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
- !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
- !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
- !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
- !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
- return false;
- }
-
- std::vector<ConstBufferKey> flat_keys;
- flat_keys.reserve(keys.size());
- for (const auto& [address, value] : keys) {
- flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
- }
-
- std::vector<BoundSamplerEntry> flat_bound_samplers;
- flat_bound_samplers.reserve(bound_samplers.size());
- for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
- }
-
- std::vector<SeparateSamplerEntry> flat_separate_samplers;
- flat_separate_samplers.reserve(separate_samplers.size());
- for (const auto& [key, sampler] : separate_samplers) {
- SeparateSamplerEntry entry;
- std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
- std::tie(entry.offset1, entry.offset2) = key.offsets;
- entry.sampler = sampler;
- flat_separate_samplers.push_back(entry);
- }
-
- std::vector<BindlessSamplerEntry> flat_bindless_samplers;
- flat_bindless_samplers.reserve(bindless_samplers.size());
- for (const auto& [address, sampler] : bindless_samplers) {
- flat_bindless_samplers.push_back(
- BindlessSamplerEntry{address.first, address.second, sampler});
- }
-
- return file.Write(flat_keys) == flat_keys.size() &&
- file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
- file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
- file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
-}
-
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
-
-ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-
-void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
- title_id = title_id_;
-}
-
-std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
- // Skip games without title id
- const bool has_title_id = title_id != 0;
- if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
- return std::nullopt;
- }
-
- Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No transferable shader cache found");
- is_usable = true;
- return std::nullopt;
- }
-
- u32 version{};
- if (!file.ReadObject(version)) {
- LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return std::nullopt;
- }
-
- if (version < NativeVersion) {
- LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
- file.Close();
- InvalidateTransferable();
- is_usable = true;
- return std::nullopt;
- }
- if (version > NativeVersion) {
- LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
- "of the emulator, skipping");
- return std::nullopt;
- }
-
- // Version is valid, load the shaders
- std::vector<ShaderDiskCacheEntry> entries;
- while (static_cast<u64>(file.Tell()) < file.GetSize()) {
- ShaderDiskCacheEntry& entry = entries.emplace_back();
- if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return std::nullopt;
- }
- }
-
- is_usable = true;
- return {std::move(entries)};
-}
-
-std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
- if (!is_usable) {
- return {};
- }
-
- Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
- return {};
- }
-
- if (const auto result = LoadPrecompiledFile(file)) {
- return *result;
- }
-
- LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
- file.Close();
- InvalidatePrecompiled();
- return {};
-}
-
-std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
- Common::FS::IOFile& file) {
- // Read compressed file from disk and decompress to virtual precompiled cache file
- std::vector<u8> compressed(file.GetSize());
- if (file.Read(compressed) != file.GetSize()) {
- return std::nullopt;
- }
- const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
- SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
- precompiled_cache_virtual_file_offset = 0;
-
- ShaderCacheVersionHash file_hash{};
- if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
- if (GetShaderCacheVersionHash() != file_hash) {
- LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
-
- std::vector<ShaderDiskCachePrecompiled> entries;
- while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- u32 binary_size;
- auto& entry = entries.emplace_back();
- if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
- !LoadObjectFromPrecompiled(entry.binary_format) ||
- !LoadObjectFromPrecompiled(binary_size)) {
- return std::nullopt;
- }
-
- entry.binary.resize(binary_size);
- if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return std::nullopt;
- }
- }
- return entries;
-}
-
-void ShaderDiskCacheOpenGL::InvalidateTransferable() {
- if (!Common::FS::RemoveFile(GetTransferablePath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
- Common::FS::PathToUTF8String(GetTransferablePath()));
- }
- InvalidatePrecompiled();
-}
-
-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
- // Clear virtaul precompiled cache file
- precompiled_cache_virtual_file.Resize(0);
-
- if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
- Common::FS::PathToUTF8String(GetPrecompiledPath()));
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
- if (!is_usable) {
- return;
- }
-
- const u64 id = entry.unique_identifier;
- if (stored_transferable.contains(id)) {
- // The shader already exists
- return;
- }
-
- Common::FS::IOFile file = AppendTransferableFile();
- if (!file.IsOpen()) {
- return;
- }
- if (!entry.Save(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
- file.Close();
- InvalidateTransferable();
- return;
- }
-
- stored_transferable.insert(id);
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
- if (!is_usable) {
- return;
- }
-
- // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
- // when writing the dump. This should be done the moment I get access to write to the virtual
- // file.
- if (precompiled_cache_virtual_file.GetSize() == 0) {
- SavePrecompiledHeaderToVirtualPrecompiledCache();
- }
-
- GLint binary_length;
- glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
-
- GLenum binary_format;
- std::vector<u8> binary(binary_length);
- glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
-
- if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
- unique_identifier);
- InvalidatePrecompiled();
- }
-}
-
-Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
- if (!EnsureDirectories()) {
- return {};
- }
-
- const auto transferable_path{GetTransferablePath()};
- const bool existed = Common::FS::Exists(transferable_path);
-
- Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- if (!existed || file.GetSize() == 0) {
- // If the file didn't exist, write its version
- if (!file.WriteObject(NativeVersion)) {
- LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- }
- return file;
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
- const auto hash{GetShaderCacheVersionHash()};
- if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
- LOG_ERROR(
- Render_OpenGL,
- "Failed to write precompiled cache version hash to virtual precompiled cache file");
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
- precompiled_cache_virtual_file_offset = 0;
- const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
- const std::vector<u8> compressed =
- Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
-
- const auto precompiled_path = GetPrecompiledPath();
- Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
- Common::FS::FileType::BinaryFile};
-
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- return;
- }
- if (file.Write(compressed) != compressed.size()) {
- LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- }
-}
-
-bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
- const auto CreateDir = [](const std::filesystem::path& dir) {
- if (!Common::FS::CreateDir(dir)) {
- LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
- Common::FS::PathToUTF8String(dir));
- return false;
- }
- return true;
- };
-
- return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
- CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
- CreateDir(GetPrecompiledDir());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
- return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
- return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
- return GetBaseDir() / "transferable";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
- return GetBaseDir() / "precompiled";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
- return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
-}
-
-std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", title_id);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <filesystem>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/file_sys/vfs_vector.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace Common::FS {
-class IOFile;
-}
-
-namespace OpenGL {
-
-using ProgramCode = std::vector<u64>;
-
-/// Describes a shader and how it's used by the guest GPU
-struct ShaderDiskCacheEntry {
- ShaderDiskCacheEntry();
- ~ShaderDiskCacheEntry();
-
- bool Load(Common::FS::IOFile& file);
-
- bool Save(Common::FS::IOFile& file) const;
-
- bool HasProgramA() const {
- return !code.empty() && !code_b.empty();
- }
-
- Tegra::Engines::ShaderType type{};
- ProgramCode code;
- ProgramCode code_b;
-
- u64 unique_identifier = 0;
- std::optional<u32> texture_handler_size;
- u32 bound_buffer = 0;
- VideoCommon::Shader::GraphicsInfo graphics_info;
- VideoCommon::Shader::ComputeInfo compute_info;
- VideoCommon::Shader::KeyMap keys;
- VideoCommon::Shader::BoundSamplerMap bound_samplers;
- VideoCommon::Shader::SeparateSamplerMap separate_samplers;
- VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-};
-
-/// Contains an OpenGL dumped binary program
-struct ShaderDiskCachePrecompiled {
- u64 unique_identifier = 0;
- GLenum binary_format = 0;
- std::vector<u8> binary;
-};
-
-class ShaderDiskCacheOpenGL {
-public:
- explicit ShaderDiskCacheOpenGL();
- ~ShaderDiskCacheOpenGL();
-
- /// Binds a title ID for all future operations.
- void BindTitleID(u64 title_id);
-
- /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
- std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
-
- /// Loads current game's precompiled cache. Invalidates on failure.
- std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
-
- /// Removes the transferable (and precompiled) cache file.
- void InvalidateTransferable();
-
- /// Removes the precompiled cache file and clears virtual precompiled cache file.
- void InvalidatePrecompiled();
-
- /// Saves a raw dump to the transferable file. Checks for collisions.
- void SaveEntry(const ShaderDiskCacheEntry& entry);
-
- /// Saves a dump entry to the precompiled file. Does not check for collisions.
- void SavePrecompiled(u64 unique_identifier, GLuint program);
-
- /// Serializes virtual precompiled shader cache file to real file
- void SaveVirtualPrecompiledFile();
-
-private:
- /// Loads the transferable cache. Returns empty on failure.
- std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
- Common::FS::IOFile& file);
-
- /// Opens current game's transferable file and write it's header if it doesn't exist
- Common::FS::IOFile AppendTransferableFile() const;
-
- /// Save precompiled header to precompiled_cache_in_memory
- void SavePrecompiledHeaderToVirtualPrecompiledCache();
-
- /// Create shader disk cache directories. Returns true on success.
- bool EnsureDirectories() const;
-
- /// Gets current game's transferable file path
- std::filesystem::path GetTransferablePath() const;
-
- /// Gets current game's precompiled file path
- std::filesystem::path GetPrecompiledPath() const;
-
- /// Get user's transferable directory path
- std::filesystem::path GetTransferableDir() const;
-
- /// Get user's precompiled directory path
- std::filesystem::path GetPrecompiledDir() const;
-
- /// Get user's shader directory path
- std::filesystem::path GetBaseDir() const;
-
- /// Get current game's title id
- std::string GetTitleID() const;
-
- template <typename T>
- bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
- const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += write_length;
- return write_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
- const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += read_length;
- return read_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool SaveObjectToPrecompiled(const T& object) {
- return SaveArrayToPrecompiled(&object, 1);
- }
-
- bool SaveObjectToPrecompiled(bool object) {
- const auto value = static_cast<u8>(object);
- return SaveArrayToPrecompiled(&value, 1);
- }
-
- template <typename T>
- bool LoadObjectFromPrecompiled(T& object) {
- return LoadArrayFromPrecompiled(&object, 1);
- }
-
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
- FileSys::VectorVfsFile precompiled_cache_virtual_file;
- // Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset = 0;
-
- // Stored transferable shaders
- std::unordered_set<u64> stored_transferable;
-
- /// Title ID to operate on
- u64 title_id = 0;
-
- // The cache has been loaded at boot
- bool is_usable = false;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-
-namespace OpenGL {
-
-namespace {
-
-void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
- if (current == old) {
- return;
- }
- if (current == 0) {
- if (enabled) {
- enabled = false;
- glDisable(stage);
- }
- return;
- }
- if (!enabled) {
- enabled = true;
- glEnable(stage);
- }
- glBindProgramARB(stage, current);
-}
-
-} // Anonymous namespace
-
-ProgramManager::ProgramManager(const Device& device)
- : use_assembly_programs{device.UseAssemblyShaders()} {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- } else {
- graphics_pipeline.Create();
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-ProgramManager::~ProgramManager() = default;
-
-void ProgramManager::BindCompute(GLuint program) {
- if (use_assembly_programs) {
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- } else {
- is_graphics_bound = false;
- glUseProgram(program);
- }
-}
-
-void ProgramManager::BindGraphicsPipeline() {
- if (!use_assembly_programs) {
- UpdateSourcePrograms();
- }
-}
-
-void ProgramManager::BindHostPipeline(GLuint pipeline) {
- if (use_assembly_programs) {
- if (geometry_enabled) {
- geometry_enabled = false;
- old_state.geometry = 0;
- glDisable(GL_GEOMETRY_PROGRAM_NV);
- }
- } else {
- if (!is_graphics_bound) {
- glUseProgram(0);
- }
- }
- glBindProgramPipeline(pipeline);
-}
-
-void ProgramManager::RestoreGuestPipeline() {
- if (use_assembly_programs) {
- glBindProgramPipeline(0);
- } else {
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-void ProgramManager::BindHostCompute(GLuint program) {
- if (use_assembly_programs) {
- glDisable(GL_COMPUTE_PROGRAM_NV);
- }
- glUseProgram(program);
- is_graphics_bound = false;
-}
-
-void ProgramManager::RestoreGuestCompute() {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- glUseProgram(0);
- }
-}
-
-void ProgramManager::UseVertexShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
- }
- current_state.vertex = program;
-}
-
-void ProgramManager::UseGeometryShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
- }
- current_state.geometry = program;
-}
-
-void ProgramManager::UseFragmentShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
- }
- current_state.fragment = program;
-}
-
-void ProgramManager::UpdateSourcePrograms() {
- if (!is_graphics_bound) {
- is_graphics_bound = true;
- glUseProgram(0);
- }
-
- const GLuint handle = graphics_pipeline.handle;
- const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
- if (current == old) {
- return;
- }
- glUseProgramStages(handle, stage, current);
- };
- update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
- update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
- update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
-
- old_state = current_state;
-}
-
-void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
- const auto& regs = maxwell.regs;
-
- // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..d7ef0775d 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,142 @@
#pragma once
-#include <cstddef>
+#include <array>
+#include <span>
#include <glad/glad.h>
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
-class Device;
-
-/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-/// Not following that rule will cause problems on some AMD drivers.
-struct alignas(16) MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
-
- GLfloat y_direction;
-};
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
-static_assert(sizeof(MaxwellUniformData) < 16384,
- "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
-
class ProgramManager {
-public:
- explicit ProgramManager(const Device& device);
- ~ProgramManager();
-
- /// Binds a compute program
- void BindCompute(GLuint program);
-
- /// Updates bound programs.
- void BindGraphicsPipeline();
-
- /// Binds an OpenGL pipeline object unsynchronized with the guest state.
- void BindHostPipeline(GLuint pipeline);
-
- /// Rewinds BindHostPipeline state changes.
- void RestoreGuestPipeline();
-
- /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
- void BindHostCompute(GLuint program);
+ static constexpr size_t NUM_STAGES = 5;
- /// Rewinds BindHostCompute state changes.
- void RestoreGuestCompute();
-
- void UseVertexShader(GLuint program);
- void UseGeometryShader(GLuint program);
- void UseFragmentShader(GLuint program);
-
-private:
- struct PipelineState {
- GLuint vertex = 0;
- GLuint geometry = 0;
- GLuint fragment = 0;
+ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
- /// Update GLSL programs.
- void UpdateSourcePrograms();
-
- OGLPipeline graphics_pipeline;
-
- PipelineState current_state;
- PipelineState old_state;
-
- bool use_assembly_programs = false;
-
- bool is_graphics_bound = true;
+public:
+ explicit ProgramManager(const Device& device) {
+ glCreateProgramPipelines(1, &pipeline.handle);
+ if (device.UseAssemblyShaders()) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ }
+ }
+
+ void BindComputeProgram(GLuint program) {
+ glUseProgram(program);
+ is_compute_bound = true;
+ }
+
+ void BindComputeAssemblyProgram(GLuint program) {
+ if (current_assembly_compute_program != program) {
+ current_assembly_compute_program = program;
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ }
+ UnbindPipeline();
+ }
+
+ void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+ static constexpr std::array<GLenum, 5> stage_enums{
+ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+ GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+ };
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+ }
+ }
+ BindPipeline();
+ }
+
+ void BindPresentPrograms(GLuint vertex, GLuint fragment) {
+ if (current_programs[0] != vertex) {
+ current_programs[0] = vertex;
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+ }
+ if (current_programs[4] != fragment) {
+ current_programs[4] = fragment;
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+ }
+ glUseProgramStages(
+ pipeline.handle,
+ GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+ current_programs[1] = 0;
+ current_programs[2] = 0;
+ current_programs[3] = 0;
+
+ if (current_stage_mask != 0) {
+ current_stage_mask = 0;
+ for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+ glDisable(program_type);
+ }
+ }
+ BindPipeline();
+ }
+
+ void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+ u32 stage_mask) {
+ const u32 changed_mask = current_stage_mask ^ stage_mask;
+ current_stage_mask = stage_mask;
+
+ if (changed_mask != 0) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (((changed_mask >> stage) & 1) != 0) {
+ if (((stage_mask >> stage) & 1) != 0) {
+ glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ } else {
+ glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ }
+ }
+ }
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+ }
+ }
+ UnbindPipeline();
+ }
+
+ void RestoreGuestCompute() {}
- bool vertex_enabled = false;
- bool geometry_enabled = false;
- bool fragment_enabled = false;
+private:
+ void BindPipeline() {
+ if (!is_pipeline_bound) {
+ is_pipeline_bound = true;
+ glBindProgramPipeline(pipeline.handle);
+ }
+ UnbindCompute();
+ }
+
+ void UnbindPipeline() {
+ if (is_pipeline_bound) {
+ is_pipeline_bound = false;
+ glBindProgramPipeline(0);
+ }
+ UnbindCompute();
+ }
+
+ void UnbindCompute() {
+ if (is_compute_bound) {
+ is_compute_bound = false;
+ glUseProgram(0);
+ }
+ }
+
+ OGLPipeline pipeline;
+ bool is_pipeline_bound{};
+ bool is_compute_bound{};
+
+ u32 current_stage_mask = 0;
+ std::array<GLuint, NUM_STAGES> current_programs{};
+ GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4bf0d6090..d432072ad 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -5,57 +5,108 @@
#include <string_view>
#include <vector>
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
-namespace {
+static OGLProgram LinkSeparableProgram(GLuint shader) {
+ OGLProgram program;
+ program.handle = glCreateProgram();
+ glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glAttachShader(program.handle, shader);
+ glLinkProgram(program.handle);
+ if (!Settings::values.renderer_debug) {
+ return program;
+ }
+ GLint link_status{};
+ glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
-std::string_view StageDebugName(GLenum type) {
- switch (type) {
- case GL_VERTEX_SHADER:
- return "vertex";
- case GL_GEOMETRY_SHADER:
- return "geometry";
- case GL_FRAGMENT_SHADER:
- return "fragment";
- case GL_COMPUTE_SHADER:
- return "compute";
+ GLint log_length{};
+ glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return program;
+ }
+ std::string log(log_length, 0);
+ glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
+ if (link_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
}
- UNIMPLEMENTED();
- return "unknown";
+ return program;
}
-} // Anonymous namespace
+static void LogShader(GLuint shader, std::string_view code = {}) {
+ GLint shader_status{};
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "Failed to build shader");
+ }
+ GLint log_length{};
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return;
+ }
+ std::string log(log_length, 0);
+ glGetShaderInfoLog(shader, log_length, nullptr, log.data());
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ if (!code.empty()) {
+ LOG_INFO(Render_OpenGL, "\n{}", code);
+ }
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
+ }
+}
-GLuint LoadShader(std::string_view source, GLenum type) {
- const std::string_view debug_type = StageDebugName(type);
- const GLuint shader_id = glCreateShader(type);
+OGLProgram CreateProgram(std::string_view code, GLenum stage) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
- const GLchar* source_string = source.data();
- const GLint source_length = static_cast<GLint>(source.size());
+ const GLint length = static_cast<GLint>(code.size());
+ const GLchar* const code_ptr = code.data();
+ glShaderSource(shader.handle, 1, &code_ptr, &length);
+ glCompileShader(shader.handle);
+ if (Settings::values.renderer_debug) {
+ LogShader(shader.handle, code);
+ }
+ return LinkSeparableProgram(shader.handle);
+}
- glShaderSource(shader_id, 1, &source_string, &source_length);
- LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
- glCompileShader(shader_id);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
- GLint result = GL_FALSE;
- GLint info_log_length;
- glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+ glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
+ static_cast<GLsizei>(code.size_bytes()));
+ glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
+ if (Settings::values.renderer_debug) {
+ LogShader(shader.handle);
+ }
+ return LinkSeparableProgram(shader.handle);
+}
- if (info_log_length > 1) {
- std::string shader_error(info_log_length, ' ');
- glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "{}", shader_error);
- } else {
- LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
+ OGLAssemblyProgram program;
+ glGenProgramsARB(1, &program.handle);
+ glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
+ static_cast<GLsizei>(code.size()), code.data());
+ if (Settings::values.renderer_debug) {
+ const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
+ if (err && *err) {
+ if (std::strstr(err, "error")) {
+ LOG_CRITICAL(Render_OpenGL, "\n{}", err);
+ LOG_INFO(Render_OpenGL, "\n{}", code);
+ } else {
+ LOG_WARNING(Render_OpenGL, "\n{}", err);
+ }
}
}
- return shader_id;
+ return program;
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1b770532e..4e1a2a8e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,92 +4,23 @@
#pragma once
+#include <span>
#include <string>
+#include <string_view>
#include <vector>
+
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
-namespace OpenGL::GLShader {
-
-/**
- * Utility function to log the source code of a list of shaders.
- * @param shaders The OpenGL shaders whose source we will print.
- */
-template <typename... T>
-void LogShaderSource(T... shaders) {
- auto shader_list = {shaders...};
-
- for (const auto& shader : shader_list) {
- if (shader == 0)
- continue;
-
- GLint source_length;
- glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
-
- std::string source(source_length, ' ');
- glGetShaderSource(shader, source_length, nullptr, &source[0]);
- LOG_INFO(Render_OpenGL, "Shader source {}", source);
- }
-}
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader
- * @param source String of the GLSL shader program
- * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
- */
-GLuint LoadShader(std::string_view source, GLenum type);
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
- * @param separable_program whether to create a separable program
- * @param shaders ID of shaders to attach to the program
- * @returns Handle of the newly created OpenGL program object
- */
-template <typename... T>
-GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
- // Link the program
- LOG_DEBUG(Render_OpenGL, "Linking program...");
-
- GLuint program_id = glCreateProgram();
-
- ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
-
- if (separable_program) {
- glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
- }
- if (hint_retrievable) {
- glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
- }
-
- glLinkProgram(program_id);
-
- // Check the program
- GLint result = GL_FALSE;
- GLint info_log_length;
- glGetProgramiv(program_id, GL_LINK_STATUS, &result);
- glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::string program_error(info_log_length, ' ');
- glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "{}", program_error);
- } else {
- LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
- }
- }
-
- if (result == GL_FALSE) {
- // There was a problem linking the shader, print the source for debugging purposes.
- LogShaderSource(shaders...);
- }
+namespace OpenGL {
- ASSERT_MSG(result == GL_TRUE, "Shader not linked");
+OGLProgram CreateProgram(std::string_view code, GLenum stage);
- ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
- return program_id;
-}
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index dbdf5230f..586da84e3 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
-void SetupDirtyShaders(Tables& tables) {
- FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
- Shaders);
-}
-
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
- SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 94c905116..5864c7c07 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -52,7 +52,6 @@ enum : u8 {
BlendState0,
BlendState7 = BlendState0 + 7,
- Shaders,
ClipDistances,
PolygonModes,
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 6dbb6bfba..2e67922a6 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -12,12 +12,15 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "common/literals.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+using namespace Common::Literals;
+
class StreamBuffer {
- static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
+ static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16;
static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
static constexpr size_t MAX_ALIGNMENT = 256;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9b4038615..c373c9cb4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
GLsizei depth;
};
-struct FormatTuple {
- GLenum internal_format;
- GLenum format = GL_NONE;
- GLenum type = GL_NONE;
-};
-
-constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
- {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
- {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
- {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
- {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
- {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
- {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
- {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
- {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
- {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
- {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
- {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
- {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
- {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
- {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
- GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
-}};
-
constexpr std::array ACCELERATED_FORMATS{
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
};
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
- return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
-}
-
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
switch (info.type) {
case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
return GL_NONE;
}
-GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
const bool is_multisampled = num_samples > 1;
switch (type) {
- case ImageViewType::e1D:
+ case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
- case ImageViewType::e2D:
+ case Shader::TextureType::Color2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
- case ImageViewType::Cube:
+ case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
- case ImageViewType::e3D:
+ case Shader::TextureType::Color3D:
return GL_TEXTURE_3D;
- case ImageViewType::e1DArray:
+ case Shader::TextureType::ColorArray1D:
return GL_TEXTURE_1D_ARRAY;
- case ImageViewType::e2DArray:
+ case Shader::TextureType::ColorArray2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
- case ImageViewType::CubeArray:
+ case Shader::TextureType::ColorArrayCube:
return GL_TEXTURE_CUBE_MAP_ARRAY;
- case ImageViewType::Rect:
- return GL_TEXTURE_RECTANGLE;
- case ImageViewType::Buffer:
+ case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,12 +212,13 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
default:
return false;
}
- const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
if (format_info.is_compressed) {
return false;
}
- if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+ if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) ==
+ ACCELERATED_FORMATS.end()) {
return false;
}
if (format_info.compatibility_by_size) {
@@ -341,6 +232,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
VideoCommon::SubresourceLayers subresource, GLenum target) {
switch (target) {
+ case GL_TEXTURE_1D:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(0),
+ .z = static_cast<GLint>(0),
+ };
+ case GL_TEXTURE_1D_ARRAY:
+ return CopyOrigin{
+ .level = static_cast<GLint>(subresource.base_level),
+ .x = static_cast<GLint>(offset.x),
+ .y = static_cast<GLint>(0),
+ .z = static_cast<GLint>(subresource.base_layer),
+ };
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
return CopyOrigin{
@@ -366,6 +271,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
VideoCommon::SubresourceLayers dst_subresource,
GLenum target) {
switch (target) {
+ case GL_TEXTURE_1D:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(1),
+ .depth = static_cast<GLsizei>(1),
+ };
+ case GL_TEXTURE_1D_ARRAY:
+ return CopyRegion{
+ .width = static_cast<GLsizei>(extent.width),
+ .height = static_cast<GLsizei>(1),
+ .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+ };
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
return CopyRegion{
@@ -387,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
- const GLuint texture = image_view->DefaultHandle();
- glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
return;
}
- const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
if (image_view->range.extent.layers > 1) {
// TODO: OpenGL doesn't support rendering to a fixed number of slices
glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -412,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
+[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
+ switch (format) {
+ case Shader::ImageFormat::Typeless:
+ break;
+ case Shader::ImageFormat::R8_SINT:
+ return GL_R8I;
+ case Shader::ImageFormat::R8_UINT:
+ return GL_R8UI;
+ case Shader::ImageFormat::R16_UINT:
+ return GL_R16UI;
+ case Shader::ImageFormat::R16_SINT:
+ return GL_R16I;
+ case Shader::ImageFormat::R32_UINT:
+ return GL_R32UI;
+ case Shader::ImageFormat::R32G32_UINT:
+ return GL_RG32UI;
+ case Shader::ImageFormat::R32G32B32A32_UINT:
+ return GL_RGBA32UI;
+ }
+ UNREACHABLE_MSG("Invalid image format={}", format);
+ return GL_R32UI;
+}
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@@ -426,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
- for (const FormatTuple& tuple : FORMAT_TABLE) {
+ for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
const GLenum format = tuple.internal_format;
GLint compat_class;
GLint compat_type;
@@ -448,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
null_image_3d.Create(GL_TEXTURE_3D);
- null_image_rect.Create(GL_TEXTURE_RECTANGLE);
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
- glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
std::array<GLuint, 4> new_handles;
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -469,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
GL_R8, 0, 1, 0, 6);
const std::array texture_handles{
- null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
- null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
- null_image_view_2d_array.handle, null_image_view_cube.handle,
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
+ null_image_view_cube.handle,
};
for (const GLuint handle : texture_handles) {
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
}
- const auto set_view = [this](ImageViewType type, GLuint handle) {
+ const auto set_view = [this](Shader::TextureType type, GLuint handle) {
if (device.HasDebuggingToolAttached()) {
const std::string name = fmt::format("NullImage {}", type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
null_image_views[static_cast<size_t>(type)] = handle;
};
- set_view(ImageViewType::e1D, null_image_view_1d.handle);
- set_view(ImageViewType::e2D, null_image_view_2d.handle);
- set_view(ImageViewType::Cube, null_image_view_cube.handle);
- set_view(ImageViewType::e3D, null_image_3d.handle);
- set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
- set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
- set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
- set_view(ImageViewType::Rect, null_image_rect.handle);
+ set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
+ set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
+ set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
+ set_view(Shader::TextureType::Color3D, null_image_3d.handle);
+ set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
+ set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
+ set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -683,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
- const auto& tuple = GetFormatTuple(info.format);
+ const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
gl_format = tuple.format;
gl_type = tuple.type;
@@ -723,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
- buffer.Create();
- glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
+ UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -737,6 +671,8 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
}
}
+Image::~Image() = default;
+
void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
@@ -760,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
}
-void Image::UploadMemory(const ImageBufferMap& map,
- std::span<const VideoCommon::BufferCopy> copies) {
- for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
- copy.dst_offset, copy.size);
- }
-}
-
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -929,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
- internal_format = GetFormatTuple(format).internal_format;
+ internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
+ }
+ full_range = info.range;
+ flat_range = info.range;
+ set_object_label = device.HasDebuggingToolAttached();
+ is_render_target = info.IsRenderTarget();
+ original_texture = image.texture.handle;
+ num_samples = image.info.num_samples;
+ if (!is_render_target) {
+ swizzle[0] = info.x_source;
+ swizzle[1] = info.y_source;
+ swizzle[2] = info.z_source;
+ swizzle[3] = info.w_source;
}
- VideoCommon::SubresourceRange flatten_range = info.range;
- std::array<GLuint, 2> handles;
- stored_views.reserve(2);
-
switch (info.type) {
case ImageViewType::e1DArray:
- flatten_range.extent.layers = 1;
+ flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e1D:
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ SetupView(Shader::TextureType::Color1D);
+ SetupView(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2DArray:
- flatten_range.extent.layers = 1;
+ flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e2D:
if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@@ -955,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.base = {.level = info.range.base.level, .layer = 0},
.extent = {.levels = 1, .layers = 1},
};
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
- break;
+ full_range = slice_range;
+
+ SetupView(Shader::TextureType::Color3D);
+ } else {
+ SetupView(Shader::TextureType::Color2D);
+ SetupView(Shader::TextureType::ColorArray2D);
}
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
break;
case ImageViewType::e3D:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ SetupView(Shader::TextureType::Color3D);
break;
case ImageViewType::CubeArray:
- flatten_range.extent.layers = 6;
+ flat_range.extent.layers = 6;
[[fallthrough]];
case ImageViewType::Cube:
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ SetupView(Shader::TextureType::ColorCube);
+ SetupView(Shader::TextureType::ColorArrayCube);
break;
case ImageViewType::Rect:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ UNIMPLEMENTED();
break;
case ImageViewType::Buffer:
- glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
- SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ UNREACHABLE();
+ break;
+ }
+ switch (info.type) {
+ case ImageViewType::e1D:
+ default_handle = Handle(Shader::TextureType::Color1D);
+ break;
+ case ImageViewType::e1DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray1D);
+ break;
+ case ImageViewType::e2D:
+ default_handle = Handle(Shader::TextureType::Color2D);
+ break;
+ case ImageViewType::e2DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray2D);
+ break;
+ case ImageViewType::e3D:
+ default_handle = Handle(Shader::TextureType::Color3D);
+ break;
+ case ImageViewType::Cube:
+ default_handle = Handle(Shader::TextureType::ColorCube);
+ break;
+ case ImageViewType::CubeArray:
+ default_handle = Handle(Shader::TextureType::ColorArrayCube);
+ break;
+ default:
break;
}
- default_handle = Handle(info.type);
}
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+ : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info)
+ : VideoCommon::ImageViewBase{info, view_info} {}
+
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
-void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
- GLuint handle, const VideoCommon::ImageViewInfo& info,
- VideoCommon::SubresourceRange view_range) {
- if (info.type == ImageViewType::Buffer) {
- // TODO: Take offset from buffer cache
- glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
- image.guest_size_bytes);
- } else {
- const GLuint parent = image.texture.handle;
- const GLenum target = ImageTarget(view_type, image.info.num_samples);
- glTextureView(handle, target, parent, internal_format, view_range.base.level,
- view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
- if (!info.IsRenderTarget()) {
- ApplySwizzle(handle, format, info.Swizzle());
- }
+GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
+ if (image_format == Shader::ImageFormat::Typeless) {
+ return Handle(texture_type);
+ }
+ const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+ image_format == Shader::ImageFormat::R16_SINT};
+ if (!storage_views) {
+ storage_views = std::make_unique<StorageViews>();
}
- if (device.HasDebuggingToolAttached()) {
- const std::string name = VideoCommon::Name(*this, view_type);
- glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+ GLuint& view{type_views[static_cast<size_t>(texture_type)]};
+ if (view == 0) {
+ view = MakeView(texture_type, ShaderFormat(image_format));
+ }
+ return view;
+}
+
+void ImageView::SetupView(Shader::TextureType view_type) {
+ views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
+}
+
+GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
+ VideoCommon::SubresourceRange view_range;
+ switch (view_type) {
+ case Shader::TextureType::Color1D:
+ case Shader::TextureType::Color2D:
+ case Shader::TextureType::ColorCube:
+ view_range = flat_range;
+ break;
+ case Shader::TextureType::ColorArray1D:
+ case Shader::TextureType::ColorArray2D:
+ case Shader::TextureType::Color3D:
+ case Shader::TextureType::ColorArrayCube:
+ view_range = full_range;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ OGLTextureView& view = stored_views.emplace_back();
+ view.Create();
+
+ const GLenum target = ImageTarget(view_type, num_samples);
+ glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!is_render_target) {
+ std::array<SwizzleSource, 4> casted_swizzle;
+ std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
+ return static_cast<SwizzleSource>(component_swizzle);
+ });
+ ApplySwizzle(view.handle, format, casted_swizzle);
+ }
+ if (set_object_label) {
+ const std::string name = VideoCommon::Name(*this);
+ glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
}
- stored_views.emplace_back().handle = handle;
- views[static_cast<size_t>(view_type)] = handle;
+ return view.handle;
}
Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index df8be12ff..921072ebe 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
#include <glad/glad.h>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
@@ -122,18 +123,17 @@ private:
bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
- StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
- OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
};
class Image : public VideoCommon::ImageBase {
@@ -143,11 +143,17 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
+ ~Image();
+
+ Image(const Image&) = delete;
+ Image& operator=(const Image&) = delete;
+
+ Image(Image&&) = default;
+ Image& operator=(Image&&) = default;
+
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
-
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@@ -162,7 +168,6 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
- OGLBuffer buffer;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
@@ -174,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+ const VideoCommon::ImageViewInfo&, GPUVAddr);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
- [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
- return views[static_cast<size_t>(query_type)];
+ [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format);
+
+ [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
+ return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -188,15 +200,38 @@ public:
return internal_format;
}
+ [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+ return gpu_addr;
+ }
+
+ [[nodiscard]] u32 BufferSize() const noexcept {
+ return buffer_size;
+ }
+
private:
- void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
- const VideoCommon::ImageViewInfo& info,
- VideoCommon::SubresourceRange view_range);
+ struct StorageViews {
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
+ };
+
+ void SetupView(Shader::TextureType view_type);
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
+
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
- GLuint default_handle = 0;
+ std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
+ GLuint default_handle = 0;
+ GPUVAddr gpu_addr = 0;
+ u32 buffer_size = 0;
+ GLuint original_texture = 0;
+ int num_samples = 0;
+ VideoCommon::SubresourceRange flat_range;
+ VideoCommon::SubresourceRange full_range;
+ std::array<u8, 4> swizzle{};
+ bool set_object_label = false;
+ bool is_render_target = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
@@ -235,6 +270,7 @@ struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true;
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
#pragma once
#include <glad/glad.h>
+
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct FormatTuple {
+ GLenum internal_format;
+ GLenum format = GL_NONE;
+ GLenum type = GL_NONE;
+};
+
+constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+ GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
+}};
+
+inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
+}
+
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a718bff7a..f1b00c24c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -25,6 +25,7 @@
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
@@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
}
AddTelemetryFields();
InitOpenGLObjects();
+
+ // Initialize default attributes to match hardware's disabled attributes
+ GLint max_attribs{};
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
+ for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
+ glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
+ }
+ // Enable seamless cubemaps when per texture parameters are not available
+ if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+ }
+ // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+ glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
+
+ glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+ &vertex_buffer_address);
+ }
}
RendererOpenGL::~RendererOpenGL() = default;
@@ -229,22 +250,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
- glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue(), 0.0f);
-
// Create shader programs
- OGLShader vertex_shader;
- vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
-
- OGLShader fragment_shader;
- fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
-
- vertex_program.Create(true, false, vertex_shader.handle);
- fragment_program.Create(true, false, fragment_shader.handle);
-
- pipeline.Create();
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
+ present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
@@ -266,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
-
- // Enable seamless cubemaps when per texture parameters are not available
- if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
- glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
- }
-
- // Enable unified vertex attributes and query vertex buffer address when the driver supports it
- if (device.HasVertexBufferUnifiedMemory()) {
- glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
- glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
-
- glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
- glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
- &vertex_buffer_address);
- }
}
void RendererOpenGL::AddTelemetryFields() {
@@ -335,17 +328,17 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
- if (renderer_settings.set_background_color) {
- // Update background color before drawing
- glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue(), 0.0f);
- }
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
+ Settings::values.bg_green.GetValue() / 255.0f,
+ Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
- std::data(ortho_matrix));
+ program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
+ glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
+ ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
@@ -406,8 +399,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
- program_manager.BindHostPipeline(pipeline.handle);
-
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@@ -455,7 +446,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- program_manager.RestoreGuestPipeline();
+ // TODO
+ // program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc19a110f..d455f572f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@@ -70,6 +69,10 @@ public:
return &rasterizer;
}
+ [[nodiscard]] std::string GetDeviceVendor() const override {
+ return device.GetVendorName();
+ }
+
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
@@ -107,9 +110,8 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
- OGLProgram vertex_program;
- OGLProgram fragment_program;
- OGLPipeline pipeline;
+ OGLProgram present_vertex;
+ OGLProgram present_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 47fddcb6e..37a4d1d9d 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,8 +16,8 @@
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
-
OGLProgram MakeProgram(std::string_view source) {
- OGLShader shader;
- shader.Create(source, GL_COMPUTE_SHADER);
-
- OGLProgram program;
- program.Create(true, false, shader.handle);
- return program;
+ return CreateProgram(source, GL_COMPUTE_SHADER);
}
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
return static_cast<size_t>(copy.extent.width * copy.extent.height *
copy.src_subresource.num_layers);
}
-
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -69,7 +62,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
swizzle_table_buffer.Create();
astc_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
- glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0);
+ glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
+ 0);
}
UtilShaders::~UtilShaders() = default;
@@ -79,35 +73,15 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_ENC_BUFFER = 2;
-
- static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
- static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
- static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
- static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
-
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
- program_manager.BindHostCompute(astc_decoder_program.handle);
+ program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, encoding_values),
- sizeof(AstcBufferData::encoding_values));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_6_to_8),
- sizeof(AstcBufferData::replicate_6_to_8));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_7_to_8),
- sizeof(AstcBufferData::replicate_7_to_8));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_8_to_8),
- sizeof(AstcBufferData::replicate_8_to_8));
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
- offsetof(AstcBufferData, replicate_byte_to_16),
- sizeof(AstcBufferData::replicate_byte_to_16));
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height);
@@ -137,6 +111,12 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
+ // Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
+ // GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
+ // glMemoryBarrier call by the texture cache runtime
+ glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
+ GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
+ GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
program_manager.RestoreGuestCompute();
}
@@ -147,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@@ -186,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
- program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -235,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
- program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@@ -263,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
- program_manager.BindHostCompute(copy_bc4_program.handle);
+ program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@@ -274,9 +254,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
- copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
- copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
@@ -299,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
- program_manager.BindHostCompute(copy_bgra_program.handle);
+ program_manager.BindComputeProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c1b2f063 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
.bindingCount = 1,
.pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
};
+template <u32 num_textures>
+inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 0,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = num_textures,
+ .images = 0,
+ .score = 2,
+};
constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
-
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
- StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
+ StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
- one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
- two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
+ one_texture_descriptor_allocator{
+ descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
+ two_textures_descriptor_allocator{
+ descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
- const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+ const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplacePipeline(key);
- const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
- &device = device](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
+ src_view](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
@@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
- const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
- src_stencil_view, descriptor_set,
- &device = device](vk::CommandBuffer cmdbuf) {
+ src_stencil_view, this](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
+ const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
src_stencil_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
-
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}
@@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
- const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+ const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = *nearest_sampler;
- const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
const VkExtent2D extent{
.width = src_image_view.size.width,
.height = src_image_view.size.height,
};
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
- &device = device](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
.tex_scale = {viewport.width, viewport.height},
.tex_offset = {0.0f, 0.0f},
};
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
// TODO: Barriers
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 0d81a06ed..33ee095c1 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
- StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
+ StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d70153df3 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -15,9 +15,7 @@
#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
-
namespace {
-
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
@@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POLYGON, // Patches
};
+void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+ std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+ return VideoCommon::TransformFeedbackState::Layout{
+ .stream = layout.stream,
+ .varying_count = layout.varying_count,
+ .stride = layout.stride,
+ };
+ });
+ state.varyings = regs.tfb_varying_locs;
+}
} // Anonymous namespace
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
- bool has_extended_dynamic_state) {
+ bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
const Maxwell& regs = maxwell3d.regs;
const std::array enabled_lut{
regs.polygon_offset_point_enable,
@@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
+ extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
+ dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
+ xfb_enabled.Assign(regs.tfb_enabled != 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
- rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
topology.Assign(regs.draw.topology);
msaa_mode.Assign(regs.multisample_mode);
raw2 = 0;
+ rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
const auto test_func =
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
-
+ depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
+ depth_format.Assign(static_cast<u32>(regs.zeta.format));
+ y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0);
+ provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0);
+ conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0);
+ smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0);
+
+ for (size_t i = 0; i < regs.rt.size(); ++i) {
+ color_formats[i] = static_cast<u8>(regs.rt[i].format);
+ }
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size);
- if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
- maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
- for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
- binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
- }
- }
- if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
- maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
- for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
- const auto& input = regs.vertex_attrib_format[index];
- auto& attribute = attributes[index];
- attribute.raw = 0;
- attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
- attribute.buffer.Assign(input.buffer);
- attribute.offset.Assign(input.offset);
- attribute.type.Assign(static_cast<u32>(input.type.Value()));
- attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
+ if (has_dynamic_vertex_input) {
+ // Dirty flag will be reset by the command buffer update
+ static constexpr std::array LUT{
+ 0u, // Invalid
+ 1u, // SignedNorm
+ 1u, // UnsignedNorm
+ 2u, // SignedInt
+ 3u, // UnsignedInt
+ 1u, // UnsignedScaled
+ 1u, // SignedScaled
+ 1u, // Float
+ };
+ const auto& attrs = regs.vertex_attrib_format;
+ attribute_types = 0;
+ for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+ const u32 mask = attrs[i].constant != 0 ? 0 : 3;
+ const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
+ attribute_types |= static_cast<u64>(type & mask) << (i * 2);
+ }
+ } else {
+ maxwell3d.dirty.flags[Dirty::VertexInput] = false;
+ enabled_divisors = 0;
+ for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
+ binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
+ enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
+ }
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ const auto& input = regs.vertex_attrib_format[index];
+ auto& attribute = attributes[index];
+ attribute.raw = 0;
+ attribute.enabled.Assign(input.constant ? 0 : 1);
+ attribute.buffer.Assign(input.buffer);
+ attribute.offset.Assign(input.offset);
+ attribute.type.Assign(static_cast<u32>(input.type.Value()));
+ attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ }
}
}
if (maxwell3d.dirty.flags[Dirty::Blending]) {
@@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
return static_cast<u16>(viewport.swizzle.raw);
});
}
- if (!has_extended_dynamic_state) {
- no_extended_dynamic_state.Assign(1);
+ if (!extended_dynamic_state) {
dynamic_state.Refresh(regs);
}
+ if (xfb_enabled) {
+ RefreshXfbState(xfb_state, regs);
+ }
}
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..c9be37935 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -12,6 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
+#include "video_core/transform_feedback.h"
namespace Vulkan {
@@ -60,7 +61,7 @@ struct FixedPipelineState {
void Refresh(const Maxwell& regs, size_t index);
- constexpr std::array<bool, 4> Mask() const noexcept {
+ std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
@@ -97,11 +98,11 @@ struct FixedPipelineState {
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
- constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
+ Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
- constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
+ Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
@@ -167,37 +168,53 @@ struct FixedPipelineState {
union {
u32 raw1;
- BitField<0, 1, u32> no_extended_dynamic_state;
- BitField<2, 1, u32> primitive_restart_enable;
- BitField<3, 1, u32> depth_bias_enable;
- BitField<4, 1, u32> depth_clamp_disabled;
- BitField<5, 1, u32> ndc_minus_one_to_one;
- BitField<6, 2, u32> polygon_mode;
- BitField<8, 5, u32> patch_control_points_minus_one;
- BitField<13, 2, u32> tessellation_primitive;
- BitField<15, 2, u32> tessellation_spacing;
- BitField<17, 1, u32> tessellation_clockwise;
- BitField<18, 1, u32> logic_op_enable;
- BitField<19, 4, u32> logic_op;
- BitField<23, 1, u32> rasterize_enable;
+ BitField<0, 1, u32> extended_dynamic_state;
+ BitField<1, 1, u32> dynamic_vertex_input;
+ BitField<2, 1, u32> xfb_enabled;
+ BitField<3, 1, u32> primitive_restart_enable;
+ BitField<4, 1, u32> depth_bias_enable;
+ BitField<5, 1, u32> depth_clamp_disabled;
+ BitField<6, 1, u32> ndc_minus_one_to_one;
+ BitField<7, 2, u32> polygon_mode;
+ BitField<9, 5, u32> patch_control_points_minus_one;
+ BitField<14, 2, u32> tessellation_primitive;
+ BitField<16, 2, u32> tessellation_spacing;
+ BitField<18, 1, u32> tessellation_clockwise;
+ BitField<19, 1, u32> logic_op_enable;
+ BitField<20, 4, u32> logic_op;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
- BitField<0, 3, u32> alpha_test_func;
- BitField<3, 1, u32> early_z;
+ BitField<0, 1, u32> rasterize_enable;
+ BitField<1, 3, u32> alpha_test_func;
+ BitField<4, 1, u32> early_z;
+ BitField<5, 1, u32> depth_enabled;
+ BitField<6, 5, u32> depth_format;
+ BitField<11, 1, u32> y_negate;
+ BitField<12, 1, u32> provoking_vertex_last;
+ BitField<13, 1, u32> conservative_raster_enable;
+ BitField<14, 1, u32> smooth_lines;
};
+ std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
- std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
- std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
+ union {
+ u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
+ u64 enabled_divisors;
+ };
+ std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+ std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
+
DynamicState dynamic_state;
+ VideoCommon::TransformFeedbackState xfb_state;
- void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
+ void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
+ bool has_dynamic_vertex_input);
size_t Hash() const noexcept;
@@ -208,8 +225,24 @@ struct FixedPipelineState {
}
size_t Size() const noexcept {
- const size_t total_size = sizeof *this;
- return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
+ if (xfb_enabled) {
+ // When transform feedback is enabled, use the whole struct
+ return sizeof(*this);
+ }
+ if (dynamic_vertex_input) {
+ // Exclude dynamic state and attributes
+ return offsetof(FixedPipelineState, attributes);
+ }
+ if (extended_dynamic_state) {
+ // Exclude dynamic state
+ return offsetof(FixedPipelineState, dynamic_state);
+ }
+ // Default
+ return offsetof(FixedPipelineState, xfb_state);
+ }
+
+ u32 DynamicAttributeType(size_t index) const noexcept {
+ return (attribute_types >> (index * 2)) & 0b11;
}
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..68a23b602 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -157,7 +157,7 @@ struct FormatTuple {
{VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
- {VK_FORMAT_UNDEFINED}, // R16_SNORM
+ {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
{VK_FORMAT_UNDEFINED}, // R16_SINT
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
@@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
}
-VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
+VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
switch (stage) {
- case Tegra::Engines::ShaderType::Vertex:
+ case Shader::Stage::VertexA:
+ case Shader::Stage::VertexB:
return VK_SHADER_STAGE_VERTEX_BIT;
- case Tegra::Engines::ShaderType::TesselationControl:
+ case Shader::Stage::TessellationControl:
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
- case Tegra::Engines::ShaderType::TesselationEval:
+ case Shader::Stage::TessellationEval:
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
- case Tegra::Engines::ShaderType::Geometry:
+ case Shader::Stage::Geometry:
return VK_SHADER_STAGE_GEOMETRY_BIT;
- case Tegra::Engines::ShaderType::Fragment:
+ case Shader::Stage::Fragment:
return VK_SHADER_STAGE_FRAGMENT_BIT;
- case Tegra::Engines::ShaderType::Compute:
+ case Shader::Stage::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
}
UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
@@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
return {};
}
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
+ switch (polygon_mode) {
+ case Maxwell::PolygonMode::Point:
+ return VK_POLYGON_MODE_POINT;
+ case Maxwell::PolygonMode::Line:
+ return VK_POLYGON_MODE_LINE;
+ case Maxwell::PolygonMode::Fill:
+ return VK_POLYGON_MODE_FILL;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
+ return {};
+}
+
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
switch (swizzle) {
case Tegra::Texture::SwizzleSource::Zero:
@@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
}
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
+ switch (msaa_mode) {
+ case Tegra::Texture::MsaaMode::Msaa1x1:
+ return VK_SAMPLE_COUNT_1_BIT;
+ case Tegra::Texture::MsaaMode::Msaa2x1:
+ case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
+ return VK_SAMPLE_COUNT_2_BIT;
+ case Tegra::Texture::MsaaMode::Msaa2x2:
+ case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
+ case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
+ return VK_SAMPLE_COUNT_4_BIT;
+ case Tegra::Texture::MsaaMode::Msaa4x2:
+ case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
+ case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
+ case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
+ return VK_SAMPLE_COUNT_8_BIT;
+ case Tegra::Texture::MsaaMode::Msaa4x4:
+ return VK_SAMPLE_COUNT_16_BIT;
+ default:
+ UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
+ return VK_SAMPLE_COUNT_1_BIT;
+ }
+}
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..8a9616039 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -5,6 +5,7 @@
#pragma once
#include "common/common_types.h"
+#include "shader_recompiler/stage.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
@@ -45,7 +46,7 @@ struct FormatInfo {
[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
PixelFormat pixel_format);
-VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
+VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
@@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
+
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..4847db6b6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+#include <boost/container/small_vector.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+
+namespace Vulkan {
+
+class DescriptorLayoutBuilder {
+public:
+ DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
+
+ bool CanUsePushDescriptor() const noexcept {
+ return device->IsKhrPushDescriptorSupported() &&
+ num_descriptors <= device->MaxPushDescriptors();
+ }
+
+ vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
+ if (bindings.empty()) {
+ return nullptr;
+ }
+ const VkDescriptorSetLayoutCreateFlags flags =
+ use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
+ return device->GetLogical().CreateDescriptorSetLayout({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = flags,
+ .bindingCount = static_cast<u32>(bindings.size()),
+ .pBindings = bindings.data(),
+ });
+ }
+
+ vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
+ VkPipelineLayout pipeline_layout,
+ bool use_push_descriptor) const {
+ if (entries.empty()) {
+ return nullptr;
+ }
+ const VkDescriptorUpdateTemplateType type =
+ use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
+ : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
+ return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
+ .pDescriptorUpdateEntries = entries.data(),
+ .templateType = type,
+ .descriptorSetLayout = descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = pipeline_layout,
+ .set = 0,
+ });
+ }
+
+ vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
+ return device->GetLogical().CreatePipelineLayout({
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = descriptor_set_layout ? 1U : 0U,
+ .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ });
+ }
+
+ void Add(const Shader::Info& info, VkShaderStageFlags stage) {
+ Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
+ }
+
+private:
+ template <typename Descriptors>
+ void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
+ const size_t num{descriptors.size()};
+ for (size_t i = 0; i < num; ++i) {
+ bindings.push_back({
+ .binding = binding,
+ .descriptorType = type,
+ .descriptorCount = descriptors[i].count,
+ .stageFlags = stage,
+ .pImmutableSamplers = nullptr,
+ });
+ entries.push_back({
+ .dstBinding = binding,
+ .dstArrayElement = 0,
+ .descriptorCount = descriptors[i].count,
+ .descriptorType = type,
+ .offset = offset,
+ .stride = sizeof(DescriptorUpdateEntry),
+ });
+ ++binding;
+ num_descriptors += descriptors[i].count;
+ offset += sizeof(DescriptorUpdateEntry);
+ }
+ }
+
+ const Device* device{};
+ boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
+ boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
+ u32 binding{};
+ u32 num_descriptors{};
+ size_t offset{};
+};
+
+inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
+ const ImageId*& image_view_ids, TextureCache& texture_cache,
+ VKUpdateDescriptorQueue& update_descriptor_queue) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ image_view_ids += desc.count;
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_view_ids += desc.count;
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const VkSampler sampler{*(samplers++)};
+ ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+ const VkImageView vk_image_view{image_view.Handle(desc.type)};
+ update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
+ update_descriptor_queue.AddImage(vk_image_view);
+ }
+ }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 3986eb172..a8d04dc61 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -103,7 +103,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
gpu(gpu_),
library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
- true, Settings::values.renderer_debug)),
+ true, Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)),
@@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
- const auto& layout = render_window.GetFramebufferLayout();
- if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
- const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
- const bool use_accelerated =
- rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
- const bool is_srgb = use_accelerated && screen_info.is_srgb;
- if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
- swapchain.Create(layout.width, layout.height, is_srgb);
- blit_screen.Recreate();
- }
-
- scheduler.WaitWorker();
-
- while (!swapchain.AcquireNextImage()) {
- swapchain.Create(layout.width, layout.height, is_srgb);
- blit_screen.Recreate();
+ SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
+ if (!render_window.IsShown()) {
+ return;
+ }
+ const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+ const bool use_accelerated =
+ rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+ const bool is_srgb = use_accelerated && screen_info.is_srgb;
+
+ bool has_been_recreated = false;
+ const auto recreate_swapchain = [&] {
+ if (!has_been_recreated) {
+ has_been_recreated = true;
+ scheduler.WaitWorker();
}
- const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
-
- scheduler.Flush(render_semaphore);
-
- if (swapchain.Present(render_semaphore)) {
- blit_screen.Recreate();
+ const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
+ swapchain.Create(layout.width, layout.height, is_srgb);
+ };
+ if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) {
+ recreate_swapchain();
+ }
+ bool is_outdated;
+ do {
+ swapchain.AcquireNextImage();
+ is_outdated = swapchain.IsOutDated();
+ if (is_outdated) {
+ recreate_swapchain();
}
- gpu.RendererFrameEndNotify();
- rasterizer.TickFrame();
+ } while (is_outdated);
+ if (has_been_recreated) {
+ blit_screen.Recreate();
}
+ const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
+ scheduler.Flush(render_semaphore);
+ scheduler.WaitWorker();
+ swapchain.Present(render_semaphore);
- render_window.OnFrameDisplayed();
+ gpu.RendererFrameEndNotify();
+ rasterizer.TickFrame();
}
void RendererVulkan::Report() const {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 72071316c..d7d17e110 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -47,6 +47,10 @@ public:
return &rasterizer;
}
+ [[nodiscard]] std::string GetDeviceVendor() const override {
+ return device.GetDriverName();
+ }
+
private:
void Report() const;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index a1a32aabe..516f428e7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -184,55 +184,54 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
.depth = 1,
},
};
- scheduler.Record(
- [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
- const VkImageMemoryBarrier base_barrier{
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = image,
- .subresourceRange =
- {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- };
- VkImageMemoryBarrier read_barrier = base_barrier;
- read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
- read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-
- VkImageMemoryBarrier write_barrier = base_barrier;
- write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
-
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, read_barrier);
- cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
- });
+ scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
+ const VkImage image = *raw_images[image_index];
+ const VkImageMemoryBarrier base_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+ VkImageMemoryBarrier read_barrier = base_barrier;
+ read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+ VkImageMemoryBarrier write_barrier = base_barrier;
+ write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
+ read_barrier);
+ cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
+ });
}
- scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
- descriptor_set = descriptor_sets[image_index], buffer = *buffer,
- size = swapchain.GetSize(), pipeline = *pipeline,
- layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
+ const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
+ const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
+ const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
- .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
+ .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
- .renderPass = renderpass,
- .framebuffer = framebuffer,
+ .renderPass = *renderpass,
+ .framebuffer = *framebuffers[image_index],
.renderArea =
{
.offset = {0, 0},
@@ -254,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
- cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices));
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
+ cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+ descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
@@ -301,8 +301,7 @@ void VKBlitScreen::CreateShaders() {
void VKBlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
- std::generate(semaphores.begin(), semaphores.end(),
- [this] { return device.GetLogical().CreateSemaphore(); });
+ std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
}
void VKBlitScreen::CreateDescriptorPool() {
@@ -630,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() {
}
void VKBlitScreen::ReleaseRawImages() {
- for (std::size_t i = 0; i < raw_images.size(); ++i) {
- scheduler.Wait(resource_ticks.at(i));
+ for (const u64 tick : resource_ticks) {
+ scheduler.Wait(tick);
}
raw_images.clear();
raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8cb65e588..8ac58bc2f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -55,42 +55,79 @@ size_t BytesPerIndex(VkIndexType index_type) {
template <typename T>
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
- std::ranges::transform(indices, indices.begin(),
- [quad, first](u32 index) { return first + index + quad * 4; });
+ for (T& index : indices) {
+ index = static_cast<T>(first + index + quad * 4);
+ }
return indices;
}
-} // Anonymous namespace
-Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
- : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
-
-Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
- VAddr cpu_addr_, u64 size_bytes_)
- : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
- buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+vk::Buffer CreateBuffer(const Device& device, u64 size) {
+ VkBufferUsageFlags flags =
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+ if (device.IsExtTransformFeedbackSupported()) {
+ flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
+ }
+ return device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = SizeBytes(),
- .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .size = size,
+ .usage = flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
+}
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
+ device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
+ commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
- commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+}
+
+VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
+ if (!device) {
+ // Null buffer, return a null descriptor
+ return VK_NULL_HANDLE;
+ }
+ const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+ return offset == view.offset && size == view.size && format == view.format;
+ })};
+ if (it != views.end()) {
+ return *it->handle;
+ }
+ views.push_back({
+ .offset = offset,
+ .size = size,
+ .format = format,
+ .handle = device->GetLogical().CreateBufferView({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .buffer = *buffer,
+ .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
+ .offset = offset,
+ .range = size,
+ }),
+ });
+ return *views.back().handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
- VKDescriptorPool& descriptor_pool)
+ DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
@@ -135,6 +172,30 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
});
}
+void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
+ static constexpr VkMemoryBarrier READ_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ };
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) {
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, READ_BARRIER);
+ cmdbuf.FillBuffer(dest_buffer, offset, size, value);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, WRITE_BARRIER);
+ });
+}
+
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
u32 base_vertex, u32 num_indices, VkBuffer buffer,
u32 offset, [[maybe_unused]] u32 size) {
@@ -151,8 +212,8 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
- ReserveNullIndexBuffer();
- vk_buffer = *null_index_buffer;
+ ReserveNullBuffer();
+ vk_buffer = *null_buffer;
}
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
@@ -160,6 +221,13 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
}
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+ if (count == 0) {
+ ReserveNullBuffer();
+ scheduler.Record([this](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
+ });
+ return;
+ }
ReserveQuadArrayLUT(first + count, true);
// The LUT has the indices 0, 1, 2, and 3 copied as an array
@@ -194,6 +262,14 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
// Already logged in the rasterizer
return;
}
+ if (buffer == VK_NULL_HANDLE) {
+ // Vulkan doesn't support null transform feedback buffers.
+ // Replace it with our own null buffer.
+ ReserveNullBuffer();
+ buffer = *null_buffer;
+ offset = 0;
+ size = 0;
+ }
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = size;
@@ -278,11 +354,11 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle
});
}
-void BufferCacheRuntime::ReserveNullIndexBuffer() {
- if (null_index_buffer) {
+void BufferCacheRuntime::ReserveNullBuffer() {
+ if (null_buffer) {
return;
}
- null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -291,14 +367,18 @@ void BufferCacheRuntime::ReserveNullIndexBuffer() {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
- });
+ };
+ if (device.IsExtTransformFeedbackSupported()) {
+ create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
+ }
+ null_buffer = device.GetLogical().CreateBuffer(create_info);
if (device.HasDebuggingToolAttached()) {
- null_index_buffer.SetObjectNameEXT("Null index buffer");
+ null_buffer.SetObjectNameEXT("Null buffer");
}
- null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
+ null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 982e92191..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -9,13 +9,14 @@
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
-class VKDescriptorPool;
+class DescriptorPool;
class VKScheduler;
class BufferCacheRuntime;
@@ -26,6 +27,8 @@ public:
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_);
+ [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
[[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
@@ -35,8 +38,17 @@ public:
}
private:
+ struct BufferView {
+ u32 offset;
+ u32 size;
+ VideoCore::Surface::PixelFormat format;
+ vk::BufferView handle;
+ };
+
+ const Device* device{};
vk::Buffer buffer;
MemoryCommit commit;
+ std::vector<BufferView> views;
};
class BufferCacheRuntime {
@@ -49,7 +61,7 @@ public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
- VKDescriptorPool& descriptor_pool);
+ DescriptorPool& descriptor_pool);
void Finish();
@@ -60,6 +72,8 @@ public:
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
+
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
@@ -85,6 +99,11 @@ public:
BindBuffer(buffer, offset, size);
}
+ void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format) {
+ update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
+ }
+
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
update_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -92,7 +111,7 @@ private:
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
- void ReserveNullIndexBuffer();
+ void ReserveNullBuffer();
const Device& device;
MemoryAllocator& memory_allocator;
@@ -105,8 +124,8 @@ private:
VkIndexType quad_array_lut_index_type{};
u32 current_num_indices = 0;
- vk::Buffer null_index_buffer;
- MemoryCommit null_index_buffer_commit;
+ vk::Buffer null_buffer;
+ MemoryCommit null_buffer_commit;
Uint8Pass uint8_pass;
QuadIndexedPass quad_index_pass;
@@ -122,6 +141,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index e11406e58..561cf5e11 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -30,121 +30,103 @@
namespace Vulkan {
using Tegra::Texture::SWIZZLE_TABLE;
-using Tegra::Texture::ASTC::EncodingsValues;
+using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
using namespace Tegra::Texture::ASTC;
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
-constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
-constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
-constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
-constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5;
-constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6;
-constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7;
-
-VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
- return {
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .offset = 0,
- .size = static_cast<u32>(size),
- };
-}
-
-std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
- return {{
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- }};
-}
+constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
+constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
+constexpr size_t ASTC_NUM_BINDINGS = 4;
+
+template <size_t size>
+inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .offset = 0,
+ .size = static_cast<u32>(size),
+};
-std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
- return {{
- {
- .binding = ASTC_BINDING_INPUT_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_ENC_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_6_TO_8_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_7_TO_8_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_8_TO_8_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_SWIZZLE_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_OUTPUT_IMAGE,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- }};
-}
+constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+}};
+
+constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 2,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = 0,
+ .images = 0,
+ .score = 2,
+};
-VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
- return {
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 2,
+constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
+ {
+ .binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = 0,
- .stride = sizeof(DescriptorUpdateEntry),
- };
-}
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = ASTC_BINDING_ENC_BUFFER,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = ASTC_BINDING_SWIZZLE_BUFFER,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = ASTC_BINDING_OUTPUT_IMAGE,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+}};
+
+constexpr DescriptorBankInfo ASTC_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 3,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = 0,
+ .images = 1,
+ .score = 4,
+};
+
+constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 2,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+};
-std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() {
- return {{
+constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
+ ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
.dstArrayElement = 0,
@@ -162,38 +144,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
.stride = sizeof(DescriptorUpdateEntry),
},
{
- .dstBinding = ASTC_BINDING_6_TO_8_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
- .dstBinding = ASTC_BINDING_7_TO_8_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
- .dstBinding = ASTC_BINDING_8_TO_8_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
- .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
- .stride = sizeof(DescriptorUpdateEntry),
- },
- {
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
@@ -210,7 +160,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
.stride = sizeof(DescriptorUpdateEntry),
},
}};
-}
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
@@ -221,23 +170,14 @@ struct AstcPushConstants {
u32 block_height;
u32 block_height_mask;
};
-
-struct AstcBufferData {
- decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
- decltype(EncodingsValues) encoding_values = EncodingsValues;
- decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
- decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
- decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
- decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
-} constexpr ASTC_BUFFER_DATA;
-
} // Anonymous namespace
-VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
- vk::Span<VkPushConstantRange> push_constants,
- std::span<const u32> code) {
+ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
+ vk::Span<VkDescriptorSetLayoutBinding> bindings,
+ vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+ const DescriptorBankInfo& bank_info,
+ vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
+ : device{device_} {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -267,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
.pipelineLayout = *layout,
.set = 0,
});
-
- descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
+ descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
}
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -277,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
+ device.SaveShader(code);
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .stage =
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = *module,
- .pName = "main",
- .pSpecializationInfo = nullptr,
- },
+ .stage{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
.layout = *layout,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
-VKComputePass::~VKComputePass() = default;
+ComputePass::~ComputePass() = default;
-VkDescriptorSet VKComputePass::CommitDescriptorSet(
- VKUpdateDescriptorQueue& update_descriptor_queue) {
- if (!descriptor_template) {
- return nullptr;
- }
- const VkDescriptorSet set = descriptor_allocator->Commit();
- update_descriptor_queue.Send(*descriptor_template, set);
- return set;
-}
-
-Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
+Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
+ DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
- BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
+ : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
+ INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
+ VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@@ -327,11 +257,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
- num_vertices](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -339,8 +268,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
@@ -349,12 +280,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
- BuildInputOutputDescriptorUpdateTemplate(),
- BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
+ : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
+ INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
+ COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@@ -384,22 +315,24 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
- num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex,
+ index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array push_constants = {base_vertex, index_shift};
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+ const std::array push_constants{base_vertex, index_shift};
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+ cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -409,21 +342,20 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
}
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_)
- : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(),
- BuildASTCPassDescriptorUpdateTemplateEntry(),
- BuildComputePushConstantRange(sizeof(AstcPushConstants)),
- ASTC_DECODER_COMP_SPV),
- device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
+ : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
+ ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
+ COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
+ scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::MakeDataBuffer() {
- constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE);
+ constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -437,27 +369,28 @@ void ASTCDecoderPass::MakeDataBuffer() {
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
- std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA));
+ std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
+ sizeof(ASTC_ENCODINGS_VALUES));
// Tack on the swizzle table at the end of the buffer
- std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE,
+ std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
sizeof(SWIZZLE_TABLE));
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(src, dst,
- VkBufferCopy{
- .srcOffset = offset,
- .dstOffset = 0,
- .size = TOTAL_BUFFER_SIZE,
- });
- cmdbuf.PipelineBarrier(
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
- VkMemoryBarrier{
- .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
- });
+ static constexpr VkMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ };
+ const VkBufferCopy copy{
+ .srcOffset = offset,
+ .dstOffset = 0,
+ .size = TOTAL_BUFFER_SIZE,
+ };
+ cmdbuf.CopyBuffer(src, dst, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ 0, write_barrier);
});
}
@@ -481,7 +414,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const VkImageMemoryBarrier image_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .srcAccessMask = is_initialized ? VK_ACCESS_SHADER_WRITE_BIT : VkAccessFlags{},
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
@@ -496,7 +429,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
- cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : 0,
+ cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
+ : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
});
@@ -509,30 +443,18 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
- sizeof(AstcBufferData::encoding_values));
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8),
- sizeof(AstcBufferData::replicate_6_to_8));
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8),
- sizeof(AstcBufferData::replicate_7_to_8));
- update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
- sizeof(AstcBufferData::replicate_8_to_8));
- update_descriptor_queue.AddBuffer(*data_buffer,
- offsetof(AstcBufferData, replicate_byte_to_16),
- sizeof(AstcBufferData::replicate_byte_to_16));
- update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
+ update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
+ update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
-
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
- const VkPipelineLayout vk_layout = *layout;
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
// To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
- scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
- block_dims, params, set](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
+ params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -542,8 +464,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
.block_height = params.block_height,
.block_height_mask = params.block_height_mask,
};
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
- cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+ cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
});
}
@@ -569,6 +493,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
});
+ scheduler.Finish();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5ea187c30..114aef2bd 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -4,7 +4,6 @@
#pragma once
-#include <optional>
#include <span>
#include <utility>
@@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
-class VKComputePass {
+class ComputePass {
public:
- explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
- vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
- ~VKComputePass();
+ explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
+ vk::Span<VkDescriptorSetLayoutBinding> bindings,
+ vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+ const DescriptorBankInfo& bank_info,
+ vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
+ ~ComputePass();
protected:
- VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
-
+ const Device& device;
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
+ vk::DescriptorSetLayout descriptor_set_layout;
+ DescriptorAllocator descriptor_allocator;
private:
- vk::DescriptorSetLayout descriptor_set_layout;
- std::optional<DescriptorAllocator> descriptor_allocator;
vk::ShaderModule module;
};
-class Uint8Pass final : public VKComputePass {
+class Uint8Pass final : public ComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
+ DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
@@ -66,10 +65,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
-class QuadIndexedPass final : public VKComputePass {
+class QuadIndexedPass final : public ComputePass {
public:
explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
@@ -84,10 +83,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
-class ASTCDecoderPass final : public VKComputePass {
+class ASTCDecoderPass final : public ComputePass {
public:
explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_);
@@ -99,7 +98,6 @@ public:
private:
void MakeDataBuffer();
- const Device& device;
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..70b84c7a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -2,152 +2,198 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <vector>
+#include <boost/container/small_vector.hpp>
+
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const SPIRVShader& shader_)
- : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
- descriptor_set_layout{CreateDescriptorSetLayout()},
- descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
- update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
- descriptor_template{CreateDescriptorUpdateTemplate()},
- shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
-
-VKComputePipeline::~VKComputePipeline() = default;
-
-VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
- if (!descriptor_template) {
- return {};
- }
- const VkDescriptorSet set = descriptor_allocator.Commit();
- update_descriptor_queue.Send(*descriptor_template, set);
- return set;
-}
-
-vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
- std::vector<VkDescriptorSetLayoutBinding> bindings;
- u32 binding = 0;
- const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
- // TODO(Rodrigo): Maybe make individual bindings here?
- for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
- bindings.push_back({
- .binding = binding++,
- .descriptorType = descriptor_type,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- });
- }
- };
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
- add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
- add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
-
- return device.GetLogical().CreateDescriptorSetLayout({
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .bindingCount = static_cast<u32>(bindings.size()),
- .pBindings = bindings.data(),
- });
-}
-
-vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
- return device.GetLogical().CreatePipelineLayout({
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .setLayoutCount = 1,
- .pSetLayouts = descriptor_set_layout.address(),
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = nullptr,
- });
-}
-
-vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
- std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
- u32 binding = 0;
- u32 offset = 0;
- FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
- if (template_entries.empty()) {
- // If the shader doesn't use descriptor sets, skip template creation.
- return {};
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+
+ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ Common::ThreadWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
+ vk::ShaderModule spv_module_)
+ : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
+ spv_module(std::move(spv_module_)) {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
}
-
- return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
- .pNext = nullptr,
- .flags = 0,
- .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
- .pDescriptorUpdateEntries = template_entries.data(),
- .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
- .descriptorSetLayout = *descriptor_set_layout,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .pipelineLayout = *layout,
- .set = DESCRIPTOR_SET,
- });
-}
-
-vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
- device.SaveShader(code);
-
- return device.GetLogical().CreateShaderModule({
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .codeSize = code.size() * sizeof(u32),
- .pCode = code.data(),
- });
-}
-
-vk::Pipeline VKComputePipeline::CreatePipeline() const {
-
- VkComputePipelineCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .stage =
- {
+ std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+ uniform_buffer_sizes.begin());
+
+ auto func{[this, &descriptor_pool, shader_notify] {
+ DescriptorLayoutBuilder builder{device};
+ builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
+
+ descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
+ pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
+ descriptor_update_template =
+ builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
+ descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
+ pipeline = device.GetLogical().CreateComputePipeline({
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .pNext = nullptr,
+ .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = *shader_module,
+ .module = *spv_module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
- .layout = *layout,
- .basePipelineHandle = nullptr,
- .basePipelineIndex = 0,
- };
-
- const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
- .pNext = nullptr,
- .requiredSubgroupSize = GuestWarpSize,
- };
-
- if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
- ci.stage.pNext = &subgroup_size_ci;
+ .layout = *pipeline_layout,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ });
+ std::lock_guard lock{build_mutex};
+ is_built = true;
+ build_condvar.notify_one();
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ }};
+ if (thread_worker) {
+ thread_worker->QueueWork(std::move(func));
+ } else {
+ func();
+ }
+}
+
+void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
+ BufferCache& buffer_cache, TextureCache& texture_cache) {
+ update_descriptor_queue.Acquire();
+
+ buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+ buffer_cache.UnbindComputeStorageBuffers();
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+ desc.is_written);
+ ++ssbo_index;
}
- return device.GetLogical().CreateComputePipeline(ci);
+ texture_cache.SynchronizeComputeDescriptors();
+
+ static constexpr size_t max_elements = 64;
+ std::array<ImageId, max_elements> image_view_ids;
+ boost::container::static_vector<u32, max_elements> image_view_indices;
+ boost::container::static_vector<VkSampler, max_elements> samplers;
+
+ const auto& qmd{kepler_compute.launch_description};
+ const auto& cbufs{qmd.const_buffer_config};
+ const bool via_header_index{qmd.linked_tsc != 0};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+ const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+ secondary_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_image);
+ std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+ samplers.push_back(sampler->Handle());
+ }
+ }
+ std::ranges::for_each(info.image_descriptors, add_image);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ buffer_cache.UnbindComputeTextureBuffers();
+ ImageId* texture_buffer_ids{image_view_ids.data()};
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
+ buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++texture_buffer_ids;
+ ++index;
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+ std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+ buffer_cache.UpdateComputeBuffers();
+ buffer_cache.BindHostComputeBuffers();
+
+ const VkSampler* samplers_it{samplers.data()};
+ const ImageId* views_it{image_view_ids.data()};
+ PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ // Wait for the pipeline to be built
+ scheduler.Record([this](vk::CommandBuffer) {
+ std::unique_lock lock{build_mutex};
+ build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+ });
+ }
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ if (!descriptor_set_layout) {
+ return;
+ }
+ const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+ const vk::Device& dev{device.GetLogical()};
+ dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+ descriptor_set, nullptr);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..52fec04d3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,61 +4,63 @@
#pragma once
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+
#include "common/common_types.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+namespace VideoCore {
+class ShaderNotify;
+}
+
namespace Vulkan {
class Device;
class VKScheduler;
-class VKUpdateDescriptorQueue;
-class VKComputePipeline final {
+class ComputePipeline {
public:
- explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const SPIRVShader& shader_);
- ~VKComputePipeline();
-
- VkDescriptorSet CommitDescriptorSet();
+ explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ Common::ThreadWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
+ vk::ShaderModule spv_module);
- VkPipeline GetHandle() const {
- return *pipeline;
- }
+ ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
+ ComputePipeline(ComputePipeline&&) noexcept = delete;
- VkPipelineLayout GetLayout() const {
- return *layout;
- }
+ ComputePipeline& operator=(const ComputePipeline&) = delete;
+ ComputePipeline(const ComputePipeline&) = delete;
- const ShaderEntries& GetEntries() const {
- return entries;
- }
+ void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
+ VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
private:
- vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
-
- vk::PipelineLayout CreatePipelineLayout() const;
-
- vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
-
- vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
-
- vk::Pipeline CreatePipeline() const;
-
const Device& device;
- VKScheduler& scheduler;
- ShaderEntries entries;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
+ Shader::Info info;
+ VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+ vk::ShaderModule spv_module;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
- VKUpdateDescriptorQueue& update_descriptor_queue;
- vk::PipelineLayout layout;
- vk::DescriptorUpdateTemplateKHR descriptor_template;
- vk::ShaderModule shader_module;
+ vk::PipelineLayout pipeline_layout;
+ vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
+
+ std::condition_variable build_condvar;
+ std::mutex build_mutex;
+ std::atomic_bool is_built{false};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..8e77e4796 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <mutex>
+#include <span>
#include <vector>
#include "common/common_types.h"
@@ -13,79 +15,149 @@
namespace Vulkan {
-// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
-constexpr std::size_t SETS_GROW_RATE = 0x20;
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
+constexpr size_t SETS_GROW_RATE = 16;
+constexpr s32 SCORE_THRESHOLD = 3;
+constexpr u32 SETS_PER_POOL = 64;
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
- VkDescriptorSetLayout layout_)
- : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
- descriptor_pool{descriptor_pool_}, layout{layout_} {}
+struct DescriptorBank {
+ DescriptorBankInfo info;
+ std::vector<vk::DescriptorPool> pools;
+};
-DescriptorAllocator::~DescriptorAllocator() = default;
+bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
+ return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
+ texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
+ textures >= subset.textures && images >= subset.image_buffers;
+}
-VkDescriptorSet DescriptorAllocator::Commit() {
- const std::size_t index = CommitResource();
- return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
+template <typename Descriptors>
+static u32 Accumulate(const Descriptors& descriptors) {
+ u32 count = 0;
+ for (const auto& descriptor : descriptors) {
+ count += descriptor.count;
+ }
+ return count;
}
-void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
- descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
+static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
+ DescriptorBankInfo bank;
+ for (const Shader::Info& info : infos) {
+ bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
+ bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
+ bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
+ bank.image_buffers += Accumulate(info.image_buffer_descriptors);
+ bank.textures += Accumulate(info.texture_descriptors);
+ bank.images += Accumulate(info.image_descriptors);
+ }
+ bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
+ bank.image_buffers + bank.textures + bank.images;
+ return bank;
}
-VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
- : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
- AllocateNewPool()} {}
-
-VKDescriptorPool::~VKDescriptorPool() = default;
-
-vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
- static constexpr u32 num_sets = 0x20000;
- static constexpr VkDescriptorPoolSize pool_sizes[] = {
- {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90},
- {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
- {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
+static void AllocatePool(const Device& device, DescriptorBank& bank) {
+ std::array<VkDescriptorPoolSize, 6> pool_sizes;
+ size_t pool_cursor{};
+ const auto add = [&](VkDescriptorType type, u32 count) {
+ if (count > 0) {
+ pool_sizes[pool_cursor++] = {
+ .type = type,
+ .descriptorCount = count * SETS_PER_POOL,
+ };
+ }
};
-
- const VkDescriptorPoolCreateInfo ci{
+ const auto& info{bank.info};
+ add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
+ add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
+ add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
+ add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
+ add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
+ add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
+ bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
- .maxSets = num_sets,
- .poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
+ .maxSets = SETS_PER_POOL,
+ .poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
- };
- return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
+ }));
+}
+
+DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
+ DescriptorBank& bank_, VkDescriptorSetLayout layout_)
+ : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
+ layout{layout_} {}
+
+VkDescriptorSet DescriptorAllocator::Commit() {
+ const size_t index = CommitResource();
+ return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
-vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
- std::size_t count) {
- const std::vector layout_copies(count, layout);
- VkDescriptorSetAllocateInfo ai{
+void DescriptorAllocator::Allocate(size_t begin, size_t end) {
+ sets.push_back(AllocateDescriptors(end - begin));
+}
+
+vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
+ const std::vector<VkDescriptorSetLayout> layouts(count, layout);
+ VkDescriptorSetAllocateInfo allocate_info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
- .descriptorPool = **active_pool,
+ .descriptorPool = *bank->pools.back(),
.descriptorSetCount = static_cast<u32>(count),
- .pSetLayouts = layout_copies.data(),
+ .pSetLayouts = layouts.data(),
};
-
- vk::DescriptorSets sets = active_pool->Allocate(ai);
- if (!sets.IsOutOfPoolMemory()) {
- return sets;
+ vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
+ if (!new_sets.IsOutOfPoolMemory()) {
+ return new_sets;
}
-
// Our current pool is out of memory. Allocate a new one and retry
- active_pool = AllocateNewPool();
- ai.descriptorPool = **active_pool;
- sets = active_pool->Allocate(ai);
- if (!sets.IsOutOfPoolMemory()) {
- return sets;
+ AllocatePool(*device, *bank);
+ allocate_info.descriptorPool = *bank->pools.back();
+ new_sets = bank->pools.back().Allocate(allocate_info);
+ if (!new_sets.IsOutOfPoolMemory()) {
+ return new_sets;
}
-
// After allocating a new pool, we are out of memory again. We can't handle this from here.
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
+DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
+ : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
+
+DescriptorPool::~DescriptorPool() = default;
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+ std::span<const Shader::Info> infos) {
+ return Allocator(layout, MakeBankInfo(infos));
+}
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+ const Shader::Info& info) {
+ return Allocator(layout, MakeBankInfo(std::array{info}));
+}
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+ const DescriptorBankInfo& info) {
+ return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
+}
+
+DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
+ std::shared_lock read_lock{banks_mutex};
+ const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
+ return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
+ });
+ if (it != bank_infos.end()) {
+ return *banks[std::distance(bank_infos.begin(), it)].get();
+ }
+ read_lock.unlock();
+
+ std::unique_lock write_lock{banks_mutex};
+ bank_infos.push_back(reqs);
+
+ auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
+ bank.info = reqs;
+ AllocatePool(device, bank);
+ return bank;
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..59466aac5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -4,57 +4,85 @@
#pragma once
+#include <shared_mutex>
+#include <span>
#include <vector>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
-class VKDescriptorPool;
class VKScheduler;
+struct DescriptorBank;
+
+struct DescriptorBankInfo {
+ [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
+
+ u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
+ u32 storage_buffers{}; ///< Number of storage buffer descriptors
+ u32 texture_buffers{}; ///< Number of texture buffer descriptors
+ u32 image_buffers{}; ///< Number of image buffer descriptors
+ u32 textures{}; ///< Number of texture descriptors
+ u32 images{}; ///< Number of image descriptors
+ s32 score{}; ///< Number of descriptors in total
+};
+
class DescriptorAllocator final : public ResourcePool {
+ friend class DescriptorPool;
+
public:
- explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
- ~DescriptorAllocator() override;
+ explicit DescriptorAllocator() = default;
+ ~DescriptorAllocator() override = default;
+
+ DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
+ DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
VkDescriptorSet Commit();
-protected:
- void Allocate(std::size_t begin, std::size_t end) override;
-
private:
- VKDescriptorPool& descriptor_pool;
- const VkDescriptorSetLayout layout;
+ explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
+ DescriptorBank& bank_, VkDescriptorSetLayout layout_);
- std::vector<vk::DescriptorSets> descriptors_allocations;
-};
+ void Allocate(size_t begin, size_t end) override;
+
+ vk::DescriptorSets AllocateDescriptors(size_t count);
+
+ const Device* device{};
+ DescriptorBank* bank{};
+ VkDescriptorSetLayout layout{};
-class VKDescriptorPool final {
- friend DescriptorAllocator;
+ std::vector<vk::DescriptorSets> sets;
+};
+class DescriptorPool {
public:
- explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
- ~VKDescriptorPool();
+ explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
+ ~DescriptorPool();
- VKDescriptorPool(const VKDescriptorPool&) = delete;
- VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+ DescriptorPool& operator=(const DescriptorPool&) = delete;
+ DescriptorPool(const DescriptorPool&) = delete;
-private:
- vk::DescriptorPool* AllocateNewPool();
+ DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
+ std::span<const Shader::Info> infos);
+ DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
+ DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
- vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
+private:
+ DescriptorBank& Bank(const DescriptorBankInfo& reqs);
const Device& device;
MasterSemaphore& master_semaphore;
- std::vector<vk::DescriptorPool> pools;
- vk::DescriptorPool* active_pool;
+ std::shared_mutex banks_mutex;
+ std::vector<DescriptorBankInfo> bank_infos;
+ std::vector<std::unique_ptr<DescriptorBank>> banks;
};
} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index fc6dd83eb..18482e1d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,29 +1,58 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
-#include <array>
-#include <cstring>
-#include <vector>
+#include <span>
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+#include "common/bit_field.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
+namespace Vulkan {
namespace {
+using boost::container::small_vector;
+using boost::container::static_vector;
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
+constexpr size_t MAX_IMAGE_ELEMENTS = 64;
+
+DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
+ DescriptorLayoutBuilder builder{device};
+ for (size_t index = 0; index < infos.size(); ++index) {
+ static constexpr std::array stages{
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+ builder.Add(infos[index], stages.at(index));
+ }
+ return builder;
+}
template <class StencilFace>
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
@@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) {
}
bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
- static constexpr std::array unsupported_topologies = {
+ static constexpr std::array unsupported_topologies{
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
- VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
- return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
- topology) == std::end(unsupported_topologies);
+ VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
+ // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
+ };
+ return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
+}
+
+bool IsLine(VkPrimitiveTopology topology) {
+ static constexpr std::array line_topologies{
+ VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
+ // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
+ };
+ return std::ranges::find(line_topologies, topology) == line_topologies.end();
}
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
@@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
BitField<12, 3, Maxwell::ViewportSwizzle> w;
};
const Swizzle unpacked{swizzle};
-
- return {
+ return VkViewportSwizzleNV{
.x = MaxwellToVK::ViewportSwizzle(unpacked.x),
.y = MaxwellToVK::ViewportSwizzle(unpacked.y),
.z = MaxwellToVK::ViewportSwizzle(unpacked.z),
@@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
};
}
-VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
- switch (msaa_mode) {
- case Tegra::Texture::MsaaMode::Msaa1x1:
- return VK_SAMPLE_COUNT_1_BIT;
- case Tegra::Texture::MsaaMode::Msaa2x1:
- case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
- return VK_SAMPLE_COUNT_2_BIT;
- case Tegra::Texture::MsaaMode::Msaa2x2:
- case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
- case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
- return VK_SAMPLE_COUNT_4_BIT;
- case Tegra::Texture::MsaaMode::Msaa4x2:
- case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
- case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
- case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
- return VK_SAMPLE_COUNT_8_BIT;
- case Tegra::Texture::MsaaMode::Msaa4x4:
- return VK_SAMPLE_COUNT_16_BIT;
- default:
- UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
- return VK_SAMPLE_COUNT_1_BIT;
+PixelFormat DecodeFormat(u8 encoded_format) {
+ const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
+ if (format == Tegra::RenderTargetFormat::NONE) {
+ return PixelFormat::Invalid;
}
+ return PixelFormatFromRenderTargetFormat(format);
}
-} // Anonymous namespace
+RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
+ RenderPassKey key;
+ std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
+ if (state.depth_enabled != 0) {
+ const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
+ key.depth_format = PixelFormatFromDepthFormat(depth_format);
+ } else {
+ key.depth_format = PixelFormat::Invalid;
+ }
+ key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
+ return key;
+}
-VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const GraphicsPipelineCacheKey& key,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- const SPIRVProgram& program, u32 num_color_buffers)
- : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
- descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
- descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
- update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
- descriptor_template{CreateDescriptorUpdateTemplate(program)},
- modules(CreateShaderModules(program)),
- pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
-
-VKGraphicsPipeline::~VKGraphicsPipeline() = default;
-
-VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
- if (!descriptor_template) {
- return {};
- }
- const VkDescriptorSet set = descriptor_allocator.Commit();
- update_descriptor_queue.Send(*descriptor_template, set);
- return set;
+size_t NumAttachments(const FixedPipelineState& state) {
+ size_t num{};
+ for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
+ if (format != Tegra::RenderTargetFormat::NONE) {
+ num = index + 1;
+ }
+ }
+ return num;
}
-vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
- vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
- const VkDescriptorSetLayoutCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .bindingCount = bindings.size(),
- .pBindings = bindings.data(),
- };
- return device.GetLogical().CreateDescriptorSetLayout(ci);
+template <typename Spec>
+bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+ const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (!Spec::enabled_stages[stage] && modules[stage]) {
+ return false;
+ }
+ const auto& info{stage_infos[stage]};
+ if constexpr (!Spec::has_storage_buffers) {
+ if (!info.storage_buffers_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_texture_buffers) {
+ if (!info.texture_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_image_buffers) {
+ if (!info.image_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_images) {
+ if (!info.image_descriptors.empty()) {
+ return false;
+ }
+ }
+ }
+ return true;
}
-vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
- const VkPipelineLayoutCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .setLayoutCount = 1,
- .pSetLayouts = descriptor_set_layout.address(),
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = nullptr,
- };
- return device.GetLogical().CreatePipelineLayout(ci);
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+ const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
+ if constexpr (sizeof...(Specs) > 0) {
+ if (!Passes<Spec>(modules, stage_infos)) {
+ return FindSpec<Specs...>(modules, stage_infos);
+ }
+ }
+ return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
}
-vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
- const SPIRVProgram& program) const {
- std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
- u32 binding = 0;
- u32 offset = 0;
- for (const auto& stage : program) {
- if (stage) {
- FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
+struct SimpleVertexFragmentSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = true;
+ static constexpr bool has_image_buffers = true;
+ static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+ const std::array<Shader::Info, NUM_STAGES>& infos) {
+ return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
+ VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
+ RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
+ std::array<vk::ShaderModule, NUM_STAGES> stages,
+ const std::array<const Shader::Info*, NUM_STAGES>& infos)
+ : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
+ update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ const Shader::Info* const info{infos[stage]};
+ if (!info) {
+ continue;
}
+ stage_infos[stage] = *info;
+ enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
+ std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
}
- if (template_entries.empty()) {
- // If the shader doesn't use descriptor sets, skip template creation.
- return {};
+ auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
+ DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
+ uses_push_descriptor = builder.CanUsePushDescriptor();
+ descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
+ if (!uses_push_descriptor) {
+ descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
+ }
+ const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
+ pipeline_layout = builder.CreatePipelineLayout(set_layout);
+ descriptor_update_template =
+ builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
+
+ const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
+ Validate();
+ MakePipeline(render_pass);
+
+ std::lock_guard lock{build_mutex};
+ is_built = true;
+ build_condvar.notify_one();
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ }};
+ if (worker_thread) {
+ worker_thread->QueueWork(std::move(func));
+ } else {
+ func();
}
+ configure_func = ConfigureFunc(spv_modules, stage_infos);
+}
- const VkDescriptorUpdateTemplateCreateInfoKHR ci{
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
- .pNext = nullptr,
- .flags = 0,
- .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
- .pDescriptorUpdateEntries = template_entries.data(),
- .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
- .descriptorSetLayout = *descriptor_set_layout,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .pipelineLayout = *layout,
- .set = DESCRIPTOR_SET,
- };
- return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
+void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
+ transition_keys.push_back(transition->key);
+ transitions.push_back(transition);
}
-std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
- const SPIRVProgram& program) const {
- VkShaderModuleCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .codeSize = 0,
- .pCode = nullptr,
- };
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+ std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
+ std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
+ std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
+ size_t sampler_index{};
+ size_t image_index{};
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
+ buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+
+ const auto& regs{maxwell3d.regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ if constexpr (Spec::has_storage_buffers) {
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+ desc.cbuf_offset, desc.is_written);
+ ++ssbo_index;
+ }
+ }
+ const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(cbufs[desc.cbuf_index].enabled);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+ const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+ second_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 raw{lhs_raw | rhs_raw};
+ return TexturePair(raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_index++] = handle.first;
+ }
+ }};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_index++] = handle.first;
- std::vector<vk::ShaderModule> shader_modules;
- shader_modules.reserve(Maxwell::MaxShaderStage);
- for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
- const auto& stage = program[i];
- if (!stage) {
- continue;
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+ samplers[sampler_index++] = sampler->Handle();
+ }
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc);
+ }
}
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ config_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ config_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ config_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ config_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ config_stage(4);
+ }
+ const std::span indices_span(image_view_indices.data(), image_index);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ ImageId* texture_buffer_index{image_view_ids.data()};
+ const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++index;
+ ++texture_buffer_index;
+ }
+ }};
+ buffer_cache.UnbindGraphicsTextureBuffers(stage);
- device.SaveShader(stage->code);
+ const Shader::Info& info{stage_infos[stage]};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ bind_stage_info(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ bind_stage_info(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ bind_stage_info(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ bind_stage_info(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ bind_stage_info(4);
+ }
+
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
- ci.codeSize = stage->code.size() * sizeof(u32);
- ci.pCode = stage->code.data();
- shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
+ update_descriptor_queue.Acquire();
+
+ const VkSampler* samplers_it{samplers.data()};
+ const ImageId* views_it{image_view_ids.data()};
+ const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ buffer_cache.BindHostStageBuffers(stage);
+ PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
+ update_descriptor_queue);
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ prepare_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ prepare_stage(1);
}
- return shader_modules;
+ if constexpr (Spec::enabled_stages[2]) {
+ prepare_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ prepare_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ prepare_stage(4);
+ }
+ ConfigureDraw();
}
-vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
- VkRenderPass renderpass,
- u32 num_color_buffers) const {
- const auto& state = cache_key.fixed_state;
- const auto& viewport_swizzles = state.viewport_swizzles;
-
- FixedPipelineState::DynamicState dynamic;
- if (device.IsExtExtendedDynamicStateSupported()) {
- // Insert dummy values, as long as they are valid they don't matter as extended dynamic
- // state is ignored
- dynamic.raw1 = 0;
- dynamic.raw2 = 0;
- dynamic.vertex_strides.fill(0);
- } else {
- dynamic = state.dynamic_state;
- }
-
- std::vector<VkVertexInputBindingDescription> vertex_bindings;
- std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const bool instanced = state.binding_divisors[index] != 0;
- const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
- vertex_bindings.push_back({
- .binding = static_cast<u32>(index),
- .stride = dynamic.vertex_strides[index],
- .inputRate = rate,
+void GraphicsPipeline::ConfigureDraw() {
+ texture_cache.UpdateRenderTargets(false);
+ scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ // Wait for the pipeline to be built
+ scheduler.Record([this](vk::CommandBuffer) {
+ std::unique_lock lock{build_mutex};
+ build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
- if (instanced) {
- vertex_binding_divisors.push_back({
- .binding = static_cast<u32>(index),
- .divisor = state.binding_divisors[index],
- });
- }
}
+ const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
+ if (bind_pipeline) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+ }
+ if (!descriptor_set_layout) {
+ return;
+ }
+ if (uses_push_descriptor) {
+ cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
+ 0, descriptor_data);
+ } else {
+ const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+ const vk::Device& dev{device.GetLogical()};
+ dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+ descriptor_set, nullptr);
+ }
+ });
+}
- std::vector<VkVertexInputAttributeDescription> vertex_attributes;
- const auto& input_attributes = program[0]->entries.attributes;
- for (std::size_t index = 0; index < state.attributes.size(); ++index) {
- const auto& attribute = state.attributes[index];
- if (!attribute.enabled) {
- continue;
+void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
+ FixedPipelineState::DynamicState dynamic{};
+ if (!key.state.extended_dynamic_state) {
+ dynamic = key.state.dynamic_state;
+ }
+ static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
+ static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
+ static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
+ if (key.state.dynamic_vertex_input) {
+ for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+ const u32 type = key.state.DynamicAttributeType(index);
+ if (!stage_infos[0].loads.Generic(index) || type == 0) {
+ continue;
+ }
+ vertex_attributes.push_back({
+ .location = static_cast<u32>(index),
+ .binding = 0,
+ .format = type == 1 ? VK_FORMAT_R32_SFLOAT
+ : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT,
+ .offset = 0,
+ });
}
- if (!input_attributes.contains(static_cast<u32>(index))) {
- // Skip attributes not used by the vertex shaders.
- continue;
+ if (!vertex_attributes.empty()) {
+ vertex_bindings.push_back({
+ .binding = 0,
+ .stride = 4,
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ });
+ }
+ } else {
+ for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const bool instanced = key.state.binding_divisors[index] != 0;
+ const auto rate =
+ instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
+ vertex_bindings.push_back({
+ .binding = static_cast<u32>(index),
+ .stride = dynamic.vertex_strides[index],
+ .inputRate = rate,
+ });
+ if (instanced) {
+ vertex_binding_divisors.push_back({
+ .binding = static_cast<u32>(index),
+ .divisor = key.state.binding_divisors[index],
+ });
+ }
+ }
+ for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+ const auto& attribute = key.state.attributes[index];
+ if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) {
+ continue;
+ }
+ vertex_attributes.push_back({
+ .location = static_cast<u32>(index),
+ .binding = attribute.buffer,
+ .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+ .offset = attribute.offset,
+ });
}
- vertex_attributes.push_back({
- .location = static_cast<u32>(index),
- .binding = attribute.buffer,
- .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
- .offset = attribute.offset,
- });
}
-
VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
.pVertexAttributeDescriptions = vertex_attributes.data(),
};
-
const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
@@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &input_divisor_ci;
}
-
- const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
+ auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
+ if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
+ if (!spv_modules[1] && !spv_modules[2]) {
+ LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
+ input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+ }
+ }
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
- .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+ .topology = input_assembly_topology,
+ .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
SupportsPrimitiveRestart(input_assembly_topology),
};
-
const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
- };
-
- VkPipelineViewportStateCreateInfo viewport_ci{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .viewportCount = Maxwell::NumViewports,
- .pViewports = nullptr,
- .scissorCount = Maxwell::NumViewports,
- .pScissors = nullptr,
+ .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
- std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
- VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+ std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
+ const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
.flags = 0,
.viewportCount = Maxwell::NumViewports,
.pViewportSwizzles = swizzles.data(),
};
- if (device.IsNvViewportSwizzleSupported()) {
- viewport_ci.pNext = &swizzle_ci;
- }
+ const VkPipelineViewportStateCreateInfo viewport_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
+ .flags = 0,
+ .viewportCount = Maxwell::NumViewports,
+ .pViewports = nullptr,
+ .scissorCount = Maxwell::NumViewports,
+ .pScissors = nullptr,
+ };
- const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable =
- static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+ static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
.rasterizerDiscardEnable =
- static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
- .polygonMode = VK_POLYGON_MODE_FILL,
+ static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+ .polygonMode =
+ MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
.cullMode = static_cast<VkCullModeFlags>(
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
- .depthBiasEnable = state.depth_bias_enable,
+ .depthBiasEnable = key.state.depth_bias_enable,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
+ VkPipelineRasterizationLineStateCreateInfoEXT line_state{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .lineRasterizationMode = key.state.smooth_lines != 0
+ ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
+ : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
+ .stippledLineEnable = VK_FALSE, // TODO
+ .lineStippleFactor = 0,
+ .lineStipplePattern = 0,
+ };
+ VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .flags = 0,
+ .conservativeRasterizationMode = key.state.conservative_raster_enable != 0
+ ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
+ : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
+ .extraPrimitiveOverestimationSize = 0.0f,
+ };
+ VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .provokingVertexMode = key.state.provoking_vertex_last != 0
+ ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT
+ : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
+ };
+ if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) {
+ line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state);
+ }
+ if (device.IsExtConservativeRasterizationSupported()) {
+ conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster);
+ }
+ if (device.IsExtProvokingVertexSupported()) {
+ provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
+ }
const VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
+ .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
-
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.depthCompareOp = dynamic.depth_test_enable
? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
: VK_COMPARE_OP_ALWAYS,
- .depthBoundsTestEnable = dynamic.depth_bounds_enable,
+ .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(),
.stencilTestEnable = dynamic.stencil_enable,
.front = GetStencilFaceState(dynamic.front),
.back = GetStencilFaceState(dynamic.back),
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
-
- std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
- for (std::size_t index = 0; index < num_color_buffers; ++index) {
- static constexpr std::array COMPONENT_TABLE{
+ if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) {
+ LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
+ }
+ static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+ const size_t num_attachments{NumAttachments(key.state)};
+ for (size_t index = 0; index < num_attachments; ++index) {
+ static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT,
};
- const auto& blend = state.attachments[index];
-
- VkColorComponentFlags color_components = 0;
- for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
- if (blend.Mask()[i]) {
- color_components |= COMPONENT_TABLE[i];
- }
+ const auto& blend{key.state.attachments[index]};
+ const std::array mask{blend.Mask()};
+ VkColorComponentFlags write_mask{};
+ for (size_t i = 0; i < mask_table.size(); ++i) {
+ write_mask |= mask[i] ? mask_table[i] : 0;
}
-
- cb_attachments[index] = {
+ cb_attachments.push_back({
.blendEnable = blend.enable != 0,
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
@@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
- .colorWriteMask = color_components,
- };
+ .colorWriteMask = write_mask,
+ });
}
-
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
- .attachmentCount = num_color_buffers,
+ .attachmentCount = static_cast<u32>(cb_attachments.size()),
.pAttachments = cb_attachments.data(),
.blendConstants = {},
};
-
- std::vector dynamic_states{
+ static_vector<VkDynamicState, 19> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
};
- if (device.IsExtExtendedDynamicStateSupported()) {
+ if (key.state.extended_dynamic_state) {
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
@@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
+ if (key.state.dynamic_vertex_input) {
+ dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
+ }
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
}
-
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
-
- const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
-
- std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
- std::size_t module_index = 0;
- for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- if (!program[stage]) {
+ static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+ if (!spv_modules[stage]) {
continue;
}
-
- VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
- stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage_ci.pNext = nullptr;
- stage_ci.flags = 0;
- stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
- stage_ci.module = *modules[module_index++];
- stage_ci.pName = "main";
- stage_ci.pSpecializationInfo = nullptr;
-
+ [[maybe_unused]] auto& stage_ci =
+ shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)),
+ .module = *spv_modules[stage],
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ });
+ /*
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
stage_ci.pNext = &subgroup_size_ci;
}
+ */
}
- return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
+ pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.pDepthStencilState = &depth_stencil_ci,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
- .layout = *layout,
- .renderPass = renderpass,
+ .layout = *pipeline_layout,
+ .renderPass = render_pass,
.subpass = 0,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
+void GraphicsPipeline::Validate() {
+ size_t num_images{};
+ for (const auto& info : stage_infos) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.image_descriptors) {
+ num_images += desc.count;
+ }
+ }
+ ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 8b6a98fe0..2bd48d697 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,30 +1,36 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
+#include <algorithm>
#include <array>
-#include <optional>
-#include <vector>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <type_traits>
-#include "common/common_types.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
-namespace Vulkan {
+namespace VideoCore {
+class ShaderNotify;
+}
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+namespace Vulkan {
struct GraphicsPipelineCacheKey {
- VkRenderPass renderpass;
- std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
- FixedPipelineState fixed_state;
+ std::array<u64, 6> unique_hashes;
+ FixedPipelineState state;
- std::size_t Hash() const noexcept;
+ size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
@@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey {
return !operator==(rhs);
}
- std::size_t Size() const noexcept {
- return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
+ size_t Size() const noexcept {
+ return sizeof(unique_hashes) + state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+ size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
+
+namespace Vulkan {
+
class Device;
-class VKDescriptorPool;
+class RenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
-using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+class GraphicsPipeline {
+ static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
-class VKGraphicsPipeline final {
public:
- explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const GraphicsPipelineCacheKey& key,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- const SPIRVProgram& program, u32 num_color_buffers);
- ~VKGraphicsPipeline();
-
- VkDescriptorSet CommitDescriptorSet();
-
- VkPipeline GetHandle() const {
- return *pipeline;
+ explicit GraphicsPipeline(
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
+ VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
+ VideoCore::ShaderNotify* shader_notify, const Device& device,
+ DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
+ Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
+ const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
+ const std::array<const Shader::Info*, NUM_STAGES>& infos);
+
+ GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
+ GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
+
+ GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
+ GraphicsPipeline(const GraphicsPipeline&) = delete;
+
+ void AddTransition(GraphicsPipeline* transition);
+
+ void Configure(bool is_indexed) {
+ configure_func(this, is_indexed);
}
- VkPipelineLayout GetLayout() const {
- return *layout;
+ [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
+ if (key == current_key) {
+ return this;
+ }
+ const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
+ return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
+ : nullptr;
}
- GraphicsPipelineCacheKey GetCacheKey() const {
- return cache_key;
+ [[nodiscard]] bool IsBuilt() const noexcept {
+ return is_built.load(std::memory_order::relaxed);
}
-private:
- vk::DescriptorSetLayout CreateDescriptorSetLayout(
- vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
+ template <typename Spec>
+ static auto MakeConfigureSpecFunc() {
+ return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
+ }
- vk::PipelineLayout CreatePipelineLayout() const;
+private:
+ template <typename Spec>
+ void ConfigureImpl(bool is_indexed);
- vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
- const SPIRVProgram& program) const;
+ void ConfigureDraw();
- std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+ void MakePipeline(VkRenderPass render_pass);
- vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
- u32 num_color_buffers) const;
+ void Validate();
+ const GraphicsPipelineCacheKey key;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
const Device& device;
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
VKScheduler& scheduler;
- const GraphicsPipelineCacheKey cache_key;
- const u64 hash;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
+
+ void (*configure_func)(GraphicsPipeline*, bool){};
+
+ std::vector<GraphicsPipelineCacheKey> transition_keys;
+ std::vector<GraphicsPipeline*> transitions;
+
+ std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
+
+ std::array<Shader::Info, NUM_STAGES> stage_infos;
+ std::array<u32, 5> enabled_uniform_buffer_masks{};
+ VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
- VKUpdateDescriptorQueue& update_descriptor_queue;
- vk::PipelineLayout layout;
- vk::DescriptorUpdateTemplateKHR descriptor_template;
- std::vector<vk::ShaderModule> modules;
-
+ vk::PipelineLayout pipeline_layout;
+ vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
+
+ std::condition_variable build_condvar;
+ std::mutex build_mutex;
+ std::atomic_bool is_built{false};
+ bool uses_push_descriptor{false};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index db78ce3d9..6852c11b0 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -2,8 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <atomic>
-#include <chrono>
+#include <thread>
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -12,8 +11,6 @@
namespace Vulkan {
-using namespace std::chrono_literals;
-
MasterSemaphore::MasterSemaphore(const Device& device) {
static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
@@ -34,9 +31,9 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
// Validation layers have a bug where they fail to track resource usage when using timeline
// semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
// a separate thread waiting for each timeline semaphore value.
- debug_thread = std::thread([this] {
+ debug_thread = std::jthread([this](std::stop_token stop_token) {
u64 counter = 0;
- while (!shutdown) {
+ while (!stop_token.stop_requested()) {
if (semaphore.Wait(counter, 10'000'000)) {
++counter;
}
@@ -44,13 +41,6 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
});
}
-MasterSemaphore::~MasterSemaphore() {
- shutdown = true;
-
- // This thread might not be started
- if (debug_thread.joinable()) {
- debug_thread.join();
- }
-}
+MasterSemaphore::~MasterSemaphore() = default;
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 4b6d64daa..4f8688118 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -39,9 +39,9 @@ public:
return KnownGpuTick() >= tick;
}
- /// Advance to the logical tick.
- void NextTick() noexcept {
- ++current_tick;
+ /// Advance to the logical tick and return the old one
+ [[nodiscard]] u64 NextTick() noexcept {
+ return current_tick.fetch_add(1, std::memory_order::relaxed);
}
/// Refresh the known GPU tick
@@ -65,11 +65,10 @@ public:
}
private:
- vk::Semaphore semaphore; ///< Timeline semaphore.
- std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
- std::atomic<u64> current_tick{1}; ///< Current logical tick.
- std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
- std::thread debug_thread; ///< Debug thread to workaround validation layer bugs.
+ vk::Semaphore semaphore; ///< Timeline semaphore.
+ std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
+ std::atomic<u64> current_tick{1}; ///< Current logical tick.
+ std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
#include <algorithm>
#include <cstddef>
+#include <fstream>
#include <memory>
+#include <thread>
#include <vector>
#include "common/bit_cast.h"
#include "common/cityhash.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/microprofile.h"
+#include "common/thread_worker.h"
#include "core/core.h"
#include "core/memory.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/program_header.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
namespace {
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
+}
-constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
-constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
-constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-
-constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
- .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
- .disable_else_derivation = true,
-};
-
-constexpr std::size_t GetStageFromProgram(std::size_t program) {
- return program == 0 ? 0 : program - 1;
+Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
+ switch (comparison) {
+ case Maxwell::ComparisonOp::Never:
+ case Maxwell::ComparisonOp::NeverOld:
+ return Shader::CompareFunction::Never;
+ case Maxwell::ComparisonOp::Less:
+ case Maxwell::ComparisonOp::LessOld:
+ return Shader::CompareFunction::Less;
+ case Maxwell::ComparisonOp::Equal:
+ case Maxwell::ComparisonOp::EqualOld:
+ return Shader::CompareFunction::Equal;
+ case Maxwell::ComparisonOp::LessEqual:
+ case Maxwell::ComparisonOp::LessEqualOld:
+ return Shader::CompareFunction::LessThanEqual;
+ case Maxwell::ComparisonOp::Greater:
+ case Maxwell::ComparisonOp::GreaterOld:
+ return Shader::CompareFunction::Greater;
+ case Maxwell::ComparisonOp::NotEqual:
+ case Maxwell::ComparisonOp::NotEqualOld:
+ return Shader::CompareFunction::NotEqual;
+ case Maxwell::ComparisonOp::GreaterEqual:
+ case Maxwell::ComparisonOp::GreaterEqualOld:
+ return Shader::CompareFunction::GreaterThanEqual;
+ case Maxwell::ComparisonOp::Always:
+ case Maxwell::ComparisonOp::AlwaysOld:
+ return Shader::CompareFunction::Always;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
+ return {};
}
-constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
- return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
+ if (attr.enabled == 0) {
+ return Shader::AttributeType::Disabled;
+ }
+ switch (attr.Type()) {
+ case Maxwell::VertexAttribute::Type::SignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::Float:
+ return Shader::AttributeType::Float;
+ case Maxwell::VertexAttribute::Type::SignedInt:
+ return Shader::AttributeType::SignedInt;
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
+ return Shader::AttributeType::UnsignedInt;
+ }
+ return Shader::AttributeType::Float;
}
-ShaderType GetShaderType(Maxwell::ShaderProgram program) {
- switch (program) {
- case Maxwell::ShaderProgram::VertexB:
- return ShaderType::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return ShaderType::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ShaderType::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ShaderType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ShaderType::Fragment;
- default:
- UNIMPLEMENTED_MSG("program={}", program);
- return ShaderType::Vertex;
+Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
+ switch (state.DynamicAttributeType(index)) {
+ case 0:
+ return Shader::AttributeType::Disabled;
+ case 1:
+ return Shader::AttributeType::Float;
+ case 2:
+ return Shader::AttributeType::SignedInt;
+ case 3:
+ return Shader::AttributeType::UnsignedInt;
}
+ return Shader::AttributeType::Disabled;
}
-template <VkDescriptorType descriptor_type, class Container>
-void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
- VkShaderStageFlags stage_flags, const Container& container) {
- const u32 num_entries = static_cast<u32>(std::size(container));
- for (std::size_t i = 0; i < num_entries; ++i) {
- u32 count = 1;
- if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
- // Combined image samplers can be arrayed.
- count = container[i].size;
+Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
+ const GraphicsPipelineCacheKey& key,
+ const Shader::IR::Program& program,
+ const Shader::IR::Program* previous_program) {
+ Shader::RuntimeInfo info;
+ if (previous_program) {
+ info.previous_stage_stores = previous_program->info.stores;
+ if (previous_program->is_geometry_passthrough) {
+ info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
}
- bindings.push_back({
- .binding = binding++,
- .descriptorType = descriptor_type,
- .descriptorCount = count,
- .stageFlags = stage_flags,
- .pImmutableSamplers = nullptr,
- });
+ } else {
+ info.previous_stage_stores.mask.set();
+ }
+ const Shader::Stage stage{program.stage};
+ const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
+ const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
+ const float point_size{Common::BitCast<float>(key.state.point_size)};
+ switch (stage) {
+ case Shader::Stage::VertexB:
+ if (!has_geometry) {
+ if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
+ info.fixed_state_point_size = point_size;
+ }
+ if (key.state.xfb_enabled) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ }
+ info.convert_depth_mode = gl_ndc;
+ }
+ if (key.state.dynamic_vertex_input) {
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ info.generic_input_types[index] = AttributeType(key.state, index);
+ }
+ } else {
+ std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
+ &CastAttributeType);
+ }
+ break;
+ case Shader::Stage::TessellationEval:
+ // We have to flip tessellation clockwise for some reason...
+ info.tess_clockwise = key.state.tessellation_clockwise == 0;
+ info.tess_primitive = [&key] {
+ const u32 raw{key.state.tessellation_primitive.Value()};
+ switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
+ case Maxwell::TessellationPrimitive::Isolines:
+ return Shader::TessPrimitive::Isolines;
+ case Maxwell::TessellationPrimitive::Triangles:
+ return Shader::TessPrimitive::Triangles;
+ case Maxwell::TessellationPrimitive::Quads:
+ return Shader::TessPrimitive::Quads;
+ }
+ UNREACHABLE();
+ return Shader::TessPrimitive::Triangles;
+ }();
+ info.tess_spacing = [&] {
+ const u32 raw{key.state.tessellation_spacing};
+ switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
+ case Maxwell::TessellationSpacing::Equal:
+ return Shader::TessSpacing::Equal;
+ case Maxwell::TessellationSpacing::FractionalOdd:
+ return Shader::TessSpacing::FractionalOdd;
+ case Maxwell::TessellationSpacing::FractionalEven:
+ return Shader::TessSpacing::FractionalEven;
+ }
+ UNREACHABLE();
+ return Shader::TessSpacing::Equal;
+ }();
+ break;
+ case Shader::Stage::Geometry:
+ if (program.output_topology == Shader::OutputTopology::PointList) {
+ info.fixed_state_point_size = point_size;
+ }
+ if (key.state.xfb_enabled != 0) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ }
+ info.convert_depth_mode = gl_ndc;
+ break;
+ case Shader::Stage::Fragment:
+ info.alpha_test_func = MaxwellToCompareFunction(
+ key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
+ info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
+ break;
+ default:
+ break;
+ }
+ switch (key.state.topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ info.input_topology = Shader::InputTopology::Points;
+ break;
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineLoop:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ info.input_topology = Shader::InputTopology::Lines;
+ break;
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ case Maxwell::PrimitiveTopology::Quads:
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ case Maxwell::PrimitiveTopology::Polygon:
+ case Maxwell::PrimitiveTopology::Patches:
+ info.input_topology = Shader::InputTopology::Triangles;
+ break;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ info.input_topology = Shader::InputTopology::LinesAdjacency;
+ break;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+ break;
}
+ info.force_early_z = key.state.early_z != 0;
+ info.y_negate = key.state.y_negate != 0;
+ return info;
}
+} // Anonymous namespace
-u32 FillDescriptorLayout(const ShaderEntries& entries,
- std::vector<VkDescriptorSetLayoutBinding>& bindings,
- Maxwell::ShaderProgram program_type, u32 base_binding) {
- const ShaderType stage = GetStageFromProgram(program_type);
- const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
-
- u32 binding = base_binding;
- AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
- AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
- AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
- AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
- AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
- AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
- return binding;
+size_t ComputePipelineCacheKey::Hash() const noexcept {
+ const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+ return static_cast<size_t>(hash);
}
-} // Anonymous namespace
+bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+ return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
-std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+size_t GraphicsPipelineCacheKey::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
- return static_cast<std::size_t>(hash);
+ return static_cast<size_t>(hash);
}
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
return std::memcmp(&rhs, this, Size()) == 0;
}
-std::size_t ComputePipelineCacheKey::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
- return static_cast<std::size_t>(hash);
-}
-
-bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
- return std::memcmp(&rhs, this, sizeof *this) == 0;
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
+ TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+ device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+ update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
+ buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+ use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
+ workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
+ serialization_thread(1, "yuzu:PipelineSerialization") {
+ const auto& float_control{device.FloatControlProperties()};
+ const VkDriverIdKHR driver_id{device.GetDriverID()};
+ profile = Shader::Profile{
+ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
+ .unified_descriptor_binding = true,
+ .support_descriptor_aliasing = true,
+ .support_int8 = true,
+ .support_int16 = device.IsShaderInt16Supported(),
+ .support_int64 = device.IsShaderInt64Supported(),
+ .support_vertex_instance_id = false,
+ .support_float_controls = true,
+ .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
+ VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+ .support_separate_rounding_mode =
+ float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+ .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
+ .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
+ .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
+ .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
+ .support_fp16_signed_zero_nan_preserve =
+ float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
+ .support_fp32_signed_zero_nan_preserve =
+ float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
+ .support_fp64_signed_zero_nan_preserve =
+ float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
+ .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
+ .support_vote = true,
+ .support_viewport_index_layer_non_geometry =
+ device.IsExtShaderViewportIndexLayerSupported(),
+ .support_viewport_mask = device.IsNvViewportArray2Supported(),
+ .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
+ .support_demote_to_helper_invocation = true,
+ .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
+ .support_derivative_control = true,
+ .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
+
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
+
+ .lower_left_origin_mode = false,
+ .need_declared_frag_colors = false,
+
+ .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
+ .has_broken_unsigned_image_offsets = false,
+ .has_broken_signed_operations = false,
+ .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
+ .ignore_nan_fp_comparisons = false,
+ };
+ host_info = Shader::HostTranslateInfo{
+ .support_float16 = device.IsFloat16Supported(),
+ .support_int64 = device.IsShaderInt64Supported(),
+ };
}
-Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
- GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
- : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
- shader_ir(program_code, main_offset_, compiler_settings, registry),
- entries(GenerateShaderEntries(shader_ir)) {}
-
-Shader::~Shader() = default;
-
-VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
- scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
- update_descriptor_queue_} {}
-
-VKPipelineCache::~VKPipelineCache() = default;
+PipelineCache::~PipelineCache() = default;
-std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
- std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
-
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- continue;
- }
-
- const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
-
- Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
- if (!result) {
- const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
-
- // No shader found - create a new one
- static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
- const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
- ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
-
- auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
- std::move(code), stage_offset);
- result = shader.get();
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
+ MICROPROFILE_SCOPE(Vulkan_PipelineCache);
- if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, size_in_bytes);
- } else {
- null_shader = std::move(shader);
- }
+ if (!RefreshStages(graphics_key.unique_hashes)) {
+ current_pipeline = nullptr;
+ return nullptr;
+ }
+ graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+ device.IsExtVertexInputDynamicStateSupported());
+
+ if (current_pipeline) {
+ GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
+ if (next) {
+ current_pipeline = next;
+ return BuiltPipeline(current_pipeline);
}
- shaders[index] = result;
}
- return last_shaders = shaders;
+ return CurrentGraphicsPipelineSlowPath();
}
-VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
- const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
- VideoCommon::Shader::AsyncShaders& async_shaders) {
+ComputePipeline* PipelineCache::CurrentComputePipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
- if (last_graphics_pipeline && last_graphics_key == key) {
- return last_graphics_pipeline;
- }
- last_graphics_key = key;
-
- if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
- std::unique_lock lock{pipeline_cache};
- const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
- if (is_cache_miss) {
- gpu.ShaderNotify().MarkSharderBuilding();
- LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- const auto [program, bindings] = DecompileShaders(key.fixed_state);
- async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
- update_descriptor_queue, bindings, program, key,
- num_color_buffers);
- }
- last_graphics_pipeline = pair->second.get();
- return last_graphics_pipeline;
+ const ShaderInfo* const shader{ComputeShader()};
+ if (!shader) {
+ return nullptr;
}
-
- const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
- auto& entry = pair->second;
- if (is_cache_miss) {
- gpu.ShaderNotify().MarkSharderBuilding();
- LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- const auto [program, bindings] = DecompileShaders(key.fixed_state);
- entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
- update_descriptor_queue, key, bindings,
- program, num_color_buffers);
- gpu.ShaderNotify().MarkShaderComplete();
+ const auto& qmd{kepler_compute.launch_description};
+ const ComputePipelineCacheKey key{
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
+ .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+ };
+ const auto [pair, is_new]{compute_cache.try_emplace(key)};
+ auto& pipeline{pair->second};
+ if (!is_new) {
+ return pipeline.get();
}
- last_graphics_pipeline = entry.get();
- return last_graphics_pipeline;
+ pipeline = CreateComputePipeline(key, shader);
+ return pipeline.get();
}
-VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
- MICROPROFILE_SCOPE(Vulkan_PipelineCache);
-
- const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
- auto& entry = pair->second;
- if (!is_cache_miss) {
- return *entry;
+void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ if (title_id == 0) {
+ return;
}
- LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-
- const GPUVAddr gpu_addr = key.shader;
-
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+ const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
+ return;
+ }
+ pipeline_cache_filename = base_dir / "vulkan.bin";
+
+ struct {
+ std::mutex mutex;
+ size_t total{};
+ size_t built{};
+ bool has_loaded{};
+ } state;
+
+ const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+ ComputePipelineCacheKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+ workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
+ ShaderPools pools;
+ auto pipeline{CreateComputePipeline(pools, key, env, false)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ compute_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
+ const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
+ const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+ GraphicsPipelineCacheKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+ if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
+ (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
+ return;
+ }
+ workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
+ ShaderPools pools;
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
- Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
- if (!shader) {
- // No shader found - create a new one
- const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
+ std::lock_guard lock{state.mutex};
+ graphics_cache.emplace(key, std::move(pipeline));
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
+ load_graphics);
- ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ std::unique_lock lock{state.mutex};
+ callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+ state.has_loaded = true;
+ lock.unlock();
- auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
- *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
- shader = shader_info.get();
+ workers.WaitForRequests();
+}
- if (cpu_addr) {
- Register(std::move(shader_info), *cpu_addr, size_in_bytes);
- } else {
- null_kernel = std::move(shader_info);
- }
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
+ const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+ auto& pipeline{pair->second};
+ if (is_new) {
+ pipeline = CreateGraphicsPipeline();
}
-
- const Specialization specialization{
- .base_binding = 0,
- .workgroup_size = key.workgroup_size,
- .shared_memory_size = key.shared_memory_size,
- .point_size = std::nullopt,
- .enabled_attributes = {},
- .attribute_types = {},
- .ndc_minus_one_to_one = false,
- };
- const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
- shader->GetRegistry(), specialization),
- shader->GetEntries()};
- entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
- update_descriptor_queue, spirv_shader);
- return *entry;
+ if (!pipeline) {
+ return nullptr;
+ }
+ if (current_pipeline) {
+ current_pipeline->AddTransition(pipeline.get());
+ }
+ current_pipeline = pipeline.get();
+ return BuiltPipeline(current_pipeline);
}
-void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
- gpu.ShaderNotify().MarkShaderComplete();
- std::unique_lock lock{pipeline_cache};
- graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+ if (pipeline->IsBuilt()) {
+ return pipeline;
+ }
+ if (!use_asynchronous_shaders) {
+ return pipeline;
+ }
+ // If something is using depth, we can assume that games are not rendering anything which
+ // will be used one time.
+ if (maxwell3d.regs.zeta_enable) {
+ return nullptr;
+ }
+ // If games are using a small index count, we can assume these are full screen quads.
+ // Usually these shaders are only used once for building textures so we can assume they
+ // can't be built async
+ if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ return pipeline;
+ }
+ return nullptr;
}
-void VKPipelineCache::OnShaderRemoval(Shader* shader) {
- bool finished = false;
- const auto Finish = [&] {
- // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
- // flush.
- if (finished) {
- return;
- }
- finished = true;
- scheduler.Finish();
- };
-
- const GPUVAddr invalidated_addr = shader->GetGpuAddr();
- for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
- auto& entry = it->first;
- if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
- entry.shaders.end()) {
- ++it;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
+ ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+ size_t env_index{0};
+ std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+ const bool uses_vertex_a{key.unique_hashes[0] != 0};
+ const bool uses_vertex_b{key.unique_hashes[1] != 0};
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
continue;
}
- Finish();
- it = graphics_cache.erase(it);
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
+
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (!uses_vertex_a || index != 1) {
+ // Normal path
+ programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
+ } else {
+ // VertexB path when VertexA is present.
+ auto& program_va{programs[0]};
+ auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
+ }
}
- for (auto it = compute_cache.begin(); it != compute_cache.end();) {
- auto& entry = it->first;
- if (entry.shader != invalidated_addr) {
- ++it;
+ std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+ std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
+
+ const Shader::IR::Program* previous_stage{};
+ Shader::Backend::Bindings binding;
+ for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
+ ++index) {
+ if (key.unique_hashes[index] == 0) {
continue;
}
- Finish();
- it = compute_cache.erase(it);
+ UNIMPLEMENTED_IF(index == 0);
+
+ Shader::IR::Program& program{programs[index]};
+ const size_t stage_index{index - 1};
+ infos[stage_index] = &program.info;
+
+ const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
+ const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
+ device.SaveShader(code);
+ modules[stage_index] = BuildShader(device, code);
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
+ modules[stage_index].SetObjectNameEXT(name.c_str());
+ }
+ previous_stage = &program;
}
+ Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+ return std::make_unique<GraphicsPipeline>(
+ maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+ descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
+ std::move(modules), infos);
+
+} catch (const Shader::Exception& exception) {
+ LOG_ERROR(Render_Vulkan, "{}", exception.what());
+ return nullptr;
}
-std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
- Specialization specialization;
- if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
- float point_size;
- std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
- specialization.point_size = point_size;
- ASSERT(point_size != 0.0f);
- }
- for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
- const auto& attribute = fixed_state.attributes[i];
- specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
- specialization.attribute_types[i] = attribute.Type();
- }
- specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
- specialization.early_fragment_tests = fixed_state.early_z;
-
- // Alpha test
- specialization.alpha_test_func =
- FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
- specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
-
- SPIRVProgram program;
- std::vector<VkDescriptorSetLayoutBinding> bindings;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
- for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
- const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- continue;
- }
- const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-
- const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
- const ShaderType program_type = GetShaderType(program_enum);
- const auto& entries = shader->GetEntries();
- program[stage] = {
- Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
- entries,
- };
-
- const u32 old_binding = specialization.base_binding;
- specialization.base_binding =
- FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
- ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+ main_pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
+ if (!pipeline || pipeline_cache_filename.empty()) {
+ return pipeline;
}
- return {std::move(program), std::move(bindings)};
-}
-
-template <VkDescriptorType descriptor_type, class Container>
-void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
- u32& offset, const Container& container) {
- static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
- const u32 count = static_cast<u32>(std::size(container));
-
- if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
- for (u32 i = 0; i < count; ++i) {
- const u32 num_samplers = container[i].size;
- template_entries.push_back({
- .dstBinding = binding,
- .dstArrayElement = 0,
- .descriptorCount = num_samplers,
- .descriptorType = descriptor_type,
- .offset = offset,
- .stride = entry_size,
- });
-
- ++binding;
- offset += num_samplers * entry_size;
+ serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
+ boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
+ env_ptrs;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] != 0) {
+ env_ptrs.push_back(&envs[index]);
+ }
}
- return;
- }
+ SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
+ });
+ return pipeline;
+}
- if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
- descriptor_type == STORAGE_TEXEL_BUFFER) {
- // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
- // Note: Fixed in driver Windows 443.24, Linux 440.66.15
- for (u32 i = 0; i < count; ++i) {
- template_entries.push_back({
- .dstBinding = binding + i,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = descriptor_type,
- .offset = static_cast<std::size_t>(offset + i * entry_size),
- .stride = entry_size,
- });
- }
- } else if (count > 0) {
- template_entries.push_back({
- .dstBinding = binding,
- .dstArrayElement = 0,
- .descriptorCount = count,
- .descriptorType = descriptor_type,
- .offset = offset,
- .stride = entry_size,
- });
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+ const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ env.SetCachedSize(shader->size_bytes);
+
+ main_pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
+ if (!pipeline || pipeline_cache_filename.empty()) {
+ return pipeline;
}
- offset += count * entry_size;
- binding += count;
+ serialization_thread.QueueWork([this, key, env = std::move(env)] {
+ SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
+ pipeline_cache_filename, CACHE_VERSION);
+ });
+ return pipeline;
}
-void FillDescriptorUpdateTemplateEntries(
- const ShaderEntries& entries, u32& binding, u32& offset,
- std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
- AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
- AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
- AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
- AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
- AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
- AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+ ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
+ bool build_in_parallel) try {
+ LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+
+ Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ const std::vector<u32> code{EmitSPIRV(profile, program)};
+ device.SaveShader(code);
+ vk::ShaderModule spv_module{BuildShader(device, code)};
+ if (device.HasDebuggingToolAttached()) {
+ const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
+ spv_module.SetObjectNameEXT(name.c_str());
+ }
+ Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+ return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
+ thread_worker, &shader_notify, program.info,
+ std::move(spv_module));
+
+} catch (const Shader::Exception& exception) {
+ LOG_ERROR(Render_Vulkan, "{}", exception.what());
+ return nullptr;
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..efe5a7ed8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,24 +6,28 @@
#include <array>
#include <cstddef>
+#include <filesystem>
+#include <iosfwd>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
-#include <boost/functional/hash.hpp>
-
#include "common/common_types.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/async_shaders.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/shader_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -31,23 +35,24 @@ namespace Core {
class System;
}
-namespace Vulkan {
+namespace Shader::IR {
+struct Program;
+}
-class Device;
-class RasterizerVulkan;
-class VKComputePipeline;
-class VKDescriptorPool;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
+namespace VideoCore {
+class ShaderNotify;
+}
+
+namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
- GPUVAddr shader;
+ u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
- std::size_t Hash() const noexcept;
+ size_t Hash() const noexcept;
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
@@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
namespace std {
template <>
-struct hash<Vulkan::GraphicsPipelineCacheKey> {
- std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
- std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+ size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
@@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
-class Shader {
-public:
- explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
- Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
- VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
- ~Shader();
-
- GPUVAddr GetGpuAddr() const {
- return gpu_addr;
- }
-
- VideoCommon::Shader::ShaderIR& GetIR() {
- return shader_ir;
- }
-
- const VideoCommon::Shader::ShaderIR& GetIR() const {
- return shader_ir;
- }
+class ComputePipeline;
+class Device;
+class DescriptorPool;
+class RasterizerVulkan;
+class RenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return registry;
- }
+using VideoCommon::ShaderInfo;
- const ShaderEntries& GetEntries() const {
- return entries;
+struct ShaderPools {
+ void ReleaseContents() {
+ flow_block.ReleaseContents();
+ block.ReleaseContents();
+ inst.ReleaseContents();
}
-private:
- GPUVAddr gpu_addr{};
- VideoCommon::Shader::ProgramCode program_code;
- VideoCommon::Shader::Registry registry;
- VideoCommon::Shader::ShaderIR shader_ir;
- ShaderEntries entries;
+ Shader::ObjectPool<Shader::IR::Inst> inst;
+ Shader::ObjectPool<Shader::IR::Block> block;
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
-class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
+class PipelineCache : public VideoCommon::ShaderCache {
public:
- explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
- Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::KeplerCompute& kepler_compute,
- Tegra::MemoryManager& gpu_memory, const Device& device,
- VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
- ~VKPipelineCache() override;
+ explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ VKScheduler& scheduler, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
+ TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
+ ~PipelineCache();
+
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
- std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
+ [[nodiscard]] ComputePipeline* CurrentComputePipeline();
- VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
- u32 num_color_buffers,
- VideoCommon::Shader::AsyncShaders& async_shaders);
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback);
- VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+private:
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
- void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+ [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
-protected:
- void OnShaderRemoval(Shader* shader) final;
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
-private:
- std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
- const FixedPipelineState& fixed_state);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+ ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel);
- Tegra::GPU& gpu;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
+ const ShaderInfo* shader);
+
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
+ const ComputePipelineCacheKey& key,
+ Shader::Environment& env,
+ bool build_in_parallel);
const Device& device;
VKScheduler& scheduler;
- VKDescriptorPool& descriptor_pool;
+ DescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
+ RenderPassCache& render_pass_cache;
+ BufferCache& buffer_cache;
+ TextureCache& texture_cache;
+ VideoCore::ShaderNotify& shader_notify;
+ bool use_asynchronous_shaders{};
- std::unique_ptr<Shader> null_shader;
- std::unique_ptr<Shader> null_kernel;
+ GraphicsPipelineCacheKey graphics_key{};
+ GraphicsPipeline* current_pipeline{};
- std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+ std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
+ std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
- GraphicsPipelineCacheKey last_graphics_key;
- VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+ ShaderPools main_pools;
- std::mutex pipeline_cache;
- std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
- graphics_cache;
- std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
-};
+ Shader::Profile profile;
+ Shader::HostTranslateInfo host_info;
-void FillDescriptorUpdateTemplateEntries(
- const ShaderEntries& entries, u32& binding, u32& offset,
- std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
+ std::filesystem::path pipeline_cache_filename;
+
+ Common::ThreadWorker workers;
+ Common::ThreadWorker serialization_thread;
+};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cadd5147..c9cb32d71 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -114,14 +114,10 @@ void HostCounter::EndQuery() {
}
u64 HostCounter::BlockingQuery() const {
- if (tick >= cache.GetScheduler().CurrentTick()) {
- cache.GetScheduler().Flush();
- }
-
+ cache.GetScheduler().Wait(tick);
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
- query.first, query.second, 1, sizeof(data), &data, sizeof(data),
- VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+ query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
switch (query_result) {
case VK_SUCCESS:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1c9120170..c7a07fdd8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -55,11 +54,10 @@ struct DrawParams {
u32 num_instances;
u32 base_vertex;
u32 num_vertices;
+ u32 first_index;
bool is_indexed;
};
-constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
-
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
@@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
return scissor;
}
-std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
- std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
- for (size_t i = 0; i < std::size(addresses); ++i) {
- addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
- }
- return addresses;
-}
-
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
- size_t stage, size_t index = 0) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
- if constexpr (std::is_same_v<Entry, SamplerEntry>) {
- if (entry.is_separated) {
- const u32 buffer_1 = entry.buffer;
- const u32 buffer_2 = entry.secondary_buffer;
- const u32 offset_1 = entry.offset;
- const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return TextureHandle(handle_1 | handle_2, via_header_index);
- }
- }
- if (entry.is_bindless) {
- const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return TextureHandle(raw, via_header_index);
- }
- const u32 buffer = engine.GetBoundBuffer();
- const u64 offset = (entry.offset + index) * sizeof(u32);
- return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
- if (entry.is_buffer) {
- return ImageViewType::e2D;
- }
- switch (entry.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
- case Tegra::Shader::TextureType::Texture2D:
- return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
- case Tegra::Shader::TextureType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::TextureType::TextureCube:
- return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
- switch (entry.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return ImageViewType::e1D;
- case Tegra::Shader::ImageType::Texture1DArray:
- return ImageViewType::e1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return ImageViewType::e2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return ImageViewType::e2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return ImageViewType::Buffer;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
- for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
- }
- for (const auto& entry : entries.samplers) {
- for (size_t i = 0; i < entry.size; ++i) {
- const VkSampler sampler = *sampler_ptr++;
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
- update_descriptor_queue.AddSampledImage(handle, sampler);
- }
- }
- for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
- }
- for (const auto& entry : entries.images) {
- // TODO: Mark as modified
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
- update_descriptor_queue.AddImage(handle);
- }
-}
-
DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
bool is_indexed) {
DrawParams params{
@@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
.num_instances = is_instanced ? num_instances : 1,
.base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
.num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+ .first_index = is_indexed ? regs.index_array.first : 0,
.is_indexed = is_indexed,
};
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
@@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
blit_image(device, scheduler, state_tracker, descriptor_pool),
astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
memory_allocator),
- texture_cache_runtime{device, scheduler, memory_allocator,
- staging_pool, blit_image, astc_decoder_pass},
+ render_pass_cache(device), texture_cache_runtime{device, scheduler,
+ memory_allocator, staging_pool,
+ blit_image, astc_decoder_pass,
+ render_pass_cache},
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
- descriptor_pool, update_descriptor_queue),
- query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
+ pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
+ texture_cache, gpu.ShaderNotify()),
+ query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
- wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
+ wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
- if (device.UseAsynchronousShaders()) {
- async_shaders.AllocateWorkers();
- }
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
- graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-
- texture_cache.SynchronizeGraphicsDescriptors();
- texture_cache.UpdateRenderTargets(false);
-
- const auto shaders = pipeline_cache.GetShaders();
- graphics_key.shaders = GetShaderAddresses(shaders);
-
- SetupShaderDescriptors(shaders, is_indexed);
-
- const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
- graphics_key.renderpass = framebuffer->RenderPass();
-
- VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
- graphics_key, framebuffer->NumColorBuffers(), async_shaders);
- if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
- // Async graphics pipeline was not ready.
+ GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
+ if (!pipeline) {
return;
}
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ pipeline->Configure(is_indexed);
BeginTransformFeedback();
- scheduler.RequestRenderpass(framebuffer);
- scheduler.BindGraphicsPipeline(pipeline->GetHandle());
UpdateDynamicStates();
- const auto& regs = maxwell3d.regs;
- const u32 num_instances = maxwell3d.mme_draw.instance_count;
- const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
- const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
- const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
- scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
- if (descriptor_set) {
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, nullptr);
- }
+ const auto& regs{maxwell3d.regs};
+ const u32 num_instances{maxwell3d.mme_draw.instance_count};
+ const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
+ scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
- cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
- draw_params.base_vertex, draw_params.base_instance);
+ cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.first_index, draw_params.base_vertex,
+ draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
-
EndTransformFeedback();
}
@@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() {
if (!maxwell3d.ShouldExecute()) {
return;
}
+ FlushWork();
query_cache.UpdateCounters();
@@ -357,11 +222,13 @@ void RasterizerVulkan::Clear() {
.height = std::min(clear_rect.rect.extent.height, render_area.height),
};
- if (use_color) {
+ const u32 color_attachment = regs.clear_buffers.RT;
+ const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
+ const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+ if (use_color && is_color_rt) {
VkClearValue clear_value;
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
- const u32 color_attachment = regs.clear_buffers.RT;
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
const VkClearAttachment attachment{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
@@ -393,73 +260,20 @@ void RasterizerVulkan::Clear() {
});
}
-void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
- MICROPROFILE_SCOPE(Vulkan_Compute);
-
- query_cache.UpdateCounters();
+void RasterizerVulkan::DispatchCompute() {
+ FlushWork();
- const auto& launch_desc = kepler_compute.launch_description;
- auto& pipeline = pipeline_cache.GetComputePipeline({
- .shader = code_addr,
- .shared_memory_size = launch_desc.shared_alloc,
- .workgroup_size{
- launch_desc.block_dim_x,
- launch_desc.block_dim_y,
- launch_desc.block_dim_z,
- },
- });
+ ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
+ if (!pipeline) {
+ return;
+ }
+ std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
+ pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
- // Compute dispatches can't be executed inside a renderpass
+ const auto& qmd{kepler_compute.launch_description};
+ const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
-
- image_view_indices.clear();
- sampler_handles.clear();
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-
- const auto& entries = pipeline.GetEntries();
- buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
- buffer_cache.UnbindComputeStorageBuffers();
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_buffers) {
- buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
- buffer.is_written);
- ++ssbo_index;
- }
- buffer_cache.UpdateComputeBuffers();
-
- texture_cache.SynchronizeComputeDescriptors();
-
- SetupComputeUniformTexels(entries);
- SetupComputeTextures(entries);
- SetupComputeStorageTexels(entries);
- SetupComputeImages(entries);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
- update_descriptor_queue.Acquire();
-
- buffer_cache.BindHostComputeBuffers();
-
- ImageViewId* image_view_id_ptr = image_view_ids.data();
- VkSampler* sampler_ptr = sampler_handles.data();
- PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
- sampler_ptr);
-
- const VkPipeline pipeline_handle = pipeline.GetHandle();
- const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
- const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
- scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
- grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
- descriptor_set](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
- if (descriptor_set) {
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, nullptr);
- }
- cmdbuf.Dispatch(grid_x, grid_y, grid_z);
- });
+ scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
}
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -557,6 +371,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
pipeline_cache.OnCPUWrite(addr, size);
}
+void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
@@ -573,6 +394,13 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
+void RasterizerVulkan::SignalReference() {
+ if (!gpu.IsAsync()) {
+ return;
+ }
+ fence_manager.SignalOrdering();
+}
+
void RasterizerVulkan::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
@@ -605,10 +433,12 @@ void RasterizerVulkan::WaitForIdle() {
cmdbuf.SetEvent(event, flags);
cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
});
+ SignalReference();
}
void RasterizerVulkan::FragmentBarrier() {
// We already put barriers when a render pass finishes
+ scheduler.RequestOutsideRenderPassOperationContext();
}
void RasterizerVulkan::TiledCacheBarrier() {
@@ -616,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() {
}
void RasterizerVulkan::FlushCommands() {
- if (draw_counter > 0) {
- draw_counter = 0;
- scheduler.Flush();
+ if (draw_counter == 0) {
+ return;
}
+ draw_counter = 0;
+ scheduler.Flush();
}
void RasterizerVulkan::TickFrame() {
@@ -645,6 +476,10 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
return true;
}
+Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
+ return accelerate_dma;
+}
+
bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
@@ -655,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
if (!image_view) {
return false;
}
- screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
+ screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
screen_info.width = image_view->size.width;
screen_info.height = image_view->size.height;
screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
+void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
+}
+
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -670,63 +510,27 @@ void RasterizerVulkan::FlushWork() {
if ((++draw_counter & 7) != 7) {
return;
}
-
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
return;
}
-
// Otherwise (every certain number of draws) flush execution.
// This submits commands to the Vulkan driver.
scheduler.Flush();
draw_counter = 0;
}
-void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
- image_view_indices.clear();
- sampler_handles.clear();
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- Shader* const shader = shaders[stage + 1];
- if (!shader) {
- continue;
- }
- const ShaderEntries& entries = shader->GetEntries();
- SetupGraphicsUniformTexels(entries, stage);
- SetupGraphicsTextures(entries, stage);
- SetupGraphicsStorageTexels(entries, stage);
- SetupGraphicsImages(entries, stage);
-
- buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
- buffer_cache.UnbindGraphicsStorageBuffers(stage);
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_buffers) {
- buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
- buffer.cbuf_offset, buffer.is_written);
- ++ssbo_index;
- }
- }
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- buffer_cache.UpdateGraphicsBuffers(is_indexed);
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
- buffer_cache.BindHostGeometryBuffers(is_indexed);
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
- update_descriptor_queue.Acquire();
+bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMAClear(src_address, amount, value);
+}
- ImageViewId* image_view_id_ptr = image_view_ids.data();
- VkSampler* sampler_ptr = sampler_handles.data();
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- // Skip VertexA stage
- Shader* const shader = shaders[stage + 1];
- if (!shader) {
- continue;
- }
- buffer_cache.BindHostStageBuffers(stage);
- PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
- image_view_id_ptr, sampler_ptr);
- }
+bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMACopy(src_address, dest_address, amount);
}
void RasterizerVulkan::UpdateDynamicStates() {
@@ -737,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateBlendConstants(regs);
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
+ UpdateLineWidth(regs);
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthBoundsTestEnable(regs);
@@ -746,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateFrontFace(regs);
UpdateStencilOp(regs);
UpdateStencilTestEnable(regs);
+ if (device.IsExtVertexInputDynamicStateSupported()) {
+ UpdateVertexInput(regs);
+ }
}
}
@@ -777,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() {
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.uniform_texels) {
- const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.samplers) {
- for (size_t index = 0; index < entry.size; ++index) {
- const TextureHandle handle =
- GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
- image_view_indices.push_back(handle.image);
-
- Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- }
- }
-}
-
-void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.storage_texels) {
- const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.images) {
- const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.uniform_texels) {
- const TextureHandle handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.samplers) {
- for (size_t index = 0; index < entry.size; ++index) {
- const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
- COMPUTE_SHADER_INDEX, index);
- image_view_indices.push_back(handle.image);
-
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- }
- }
-}
-
-void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.storage_texels) {
- const TextureHandle handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.images) {
- const TextureHandle handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
- image_view_indices.push_back(handle.image);
- }
-}
-
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
return;
@@ -952,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
}
}
+void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchLineWidth()) {
+ return;
+ }
+ const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased;
+ scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
+}
+
void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchCullMode()) {
return;
@@ -966,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
+ bool enabled = regs.depth_bounds_enable;
+ if (enabled && !device.IsDepthBoundsSupported()) {
+ LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
+ enabled = false;
+ }
scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
@@ -1053,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
+void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
+ auto& dirty{maxwell3d.dirty.flags};
+ if (!dirty[Dirty::VertexInput]) {
+ return;
+ }
+ dirty[Dirty::VertexInput] = false;
+
+ boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
+ boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
+
+ // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
+ // generating dirty state. Track the highest dirty attribute and update all attributes until
+ // that one.
+ size_t highest_dirty_attr{};
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ if (dirty[Dirty::VertexAttribute0 + index]) {
+ highest_dirty_attr = index;
+ }
+ }
+ for (size_t index = 0; index < highest_dirty_attr; ++index) {
+ const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
+ const u32 binding{attribute.buffer};
+ dirty[Dirty::VertexAttribute0 + index] = false;
+ dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
+ if (!attribute.constant) {
+ attributes.push_back({
+ .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
+ .pNext = nullptr,
+ .location = static_cast<u32>(index),
+ .binding = binding,
+ .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+ .offset = attribute.offset,
+ });
+ }
+ }
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ if (!dirty[Dirty::VertexBinding0 + index]) {
+ continue;
+ }
+ dirty[Dirty::VertexBinding0 + index] = false;
+
+ const u32 binding{static_cast<u32>(index)};
+ const auto& input_binding{regs.vertex_array[binding]};
+ const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)};
+ bindings.push_back({
+ .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
+ .pNext = nullptr,
+ .binding = binding,
+ .stride = input_binding.stride,
+ .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
+ .divisor = is_instanced ? input_binding.divisor : 1,
+ });
+ }
+ scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetVertexInputEXT(bindings, attributes);
+ });
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb8c5c279..866827247 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -13,6 +13,7 @@
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
@@ -20,14 +21,13 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/async_shaders.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -49,6 +49,18 @@ struct VKScreenInfo;
class StateTracker;
+class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
+public:
+ explicit AccelerateDMA(BufferCache& buffer_cache);
+
+ bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
+
+ bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+
+private:
+ BufferCache& buffer_cache;
+};
+
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -60,7 +72,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
- void DispatchCompute(GPUVAddr code_addr) override;
+ void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -72,8 +84,10 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
+ void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
@@ -84,21 +98,11 @@ public:
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
-
- VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
- return async_shaders;
- }
-
- const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
- return async_shaders;
- }
-
- /// Maximum supported size that a constbuffer can have in bytes.
- static constexpr size_t MaxConstbufferSize = 0x10000;
- static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
- "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
@@ -109,46 +113,19 @@ private:
void FlushWork();
- /// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
- bool is_indexed);
-
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
- /// Setup uniform texels in the graphics pipeline.
- void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup textures in the graphics pipeline.
- void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup storage texels in the graphics pipeline.
- void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup images in the graphics pipeline.
- void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup texel buffers in the compute pipeline.
- void SetupComputeUniformTexels(const ShaderEntries& entries);
-
- /// Setup textures in the compute pipeline.
- void SetupComputeTextures(const ShaderEntries& entries);
-
- /// Setup storage texels in the compute pipeline.
- void SetupComputeStorageTexels(const ShaderEntries& entries);
-
- /// Setup images in the compute pipeline.
- void SetupComputeImages(const ShaderEntries& entries);
-
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -159,6 +136,8 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
+
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -171,23 +150,22 @@ private:
VKScheduler& scheduler;
StagingBufferPool staging_pool;
- VKDescriptorPool descriptor_pool;
+ DescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
ASTCDecoderPass astc_decoder_pass;
-
- GraphicsPipelineCacheKey graphics_key;
+ RenderPassCache render_pass_cache;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
- VKPipelineCache pipeline_cache;
+ PipelineCache pipeline_cache;
VKQueryCache query_cache;
+ AccelerateDMA accelerate_dma;
VKFenceManager fence_manager;
vk::Event wfi_event;
- VideoCommon::Shader::AsyncShaders async_shaders;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..451ffe019
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+namespace {
+using VideoCore::Surface::PixelFormat;
+
+VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
+ VkSampleCountFlagBits samples) {
+ using MaxwellToVK::SurfaceFormat;
+ return {
+ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
+ .samples = samples,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+}
+} // Anonymous namespace
+
+RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
+
+VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
+ std::lock_guard lock{mutex};
+ const auto [pair, is_new] = cache.try_emplace(key);
+ if (!is_new) {
+ return *pair->second;
+ }
+ boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
+ std::array<VkAttachmentReference, 8> references{};
+ u32 num_attachments{};
+ u32 num_colors{};
+ for (size_t index = 0; index < key.color_formats.size(); ++index) {
+ const PixelFormat format{key.color_formats[index]};
+ const bool is_valid{format != PixelFormat::Invalid};
+ references[index] = VkAttachmentReference{
+ .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ if (is_valid) {
+ descriptions.push_back(AttachmentDescription(*device, format, key.samples));
+ num_attachments = static_cast<u32>(index + 1);
+ ++num_colors;
+ }
+ }
+ const bool has_depth{key.depth_format != PixelFormat::Invalid};
+ VkAttachmentReference depth_reference{};
+ if (key.depth_format != PixelFormat::Invalid) {
+ depth_reference = VkAttachmentReference{
+ .attachment = num_colors,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
+ }
+ const VkSubpassDescription subpass{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = num_attachments,
+ .pColorAttachments = references.data(),
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
+ pair->second = device->GetLogical().CreateRenderPass({
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = static_cast<u32>(descriptions.size()),
+ .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ .dependencyCount = 0,
+ .pDependencies = nullptr,
+ });
+ return *pair->second;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..eaa0ed775
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+struct RenderPassKey {
+ auto operator<=>(const RenderPassKey&) const noexcept = default;
+
+ std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
+ VideoCore::Surface::PixelFormat depth_format;
+ VkSampleCountFlagBits samples;
+};
+
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::RenderPassKey> {
+ [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
+ size_t value = static_cast<size_t>(key.depth_format) << 48;
+ value ^= static_cast<size_t>(key.samples) << 52;
+ for (size_t i = 0; i < key.color_formats.size(); ++i) {
+ value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
+ }
+ return value;
+ }
+};
+} // namespace std
+
+namespace Vulkan {
+
+class Device;
+
+class RenderPassCache {
+public:
+ explicit RenderPassCache(const Device& device_);
+
+ VkRenderPass Get(const RenderPassKey& key);
+
+private:
+ const Device* device{};
+ std::unordered_map<RenderPassKey, vk::RenderPass> cache;
+ std::mutex mutex;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
- : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
-
-ResourcePool::~ResourcePool() = default;
+ : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
- master_semaphore.Refresh();
- const u64 gpu_tick = master_semaphore.KnownGpuTick();
+ master_semaphore->Refresh();
+ const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
- ticks[iterator] = master_semaphore.CurrentTick();
+ ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
- ticks[free_resource] = master_semaphore.CurrentTick();
+ ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
*/
class ResourcePool {
public:
+ explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
- virtual ~ResourcePool();
+
+ virtual ~ResourcePool() = default;
+
+ ResourcePool& operator=(ResourcePool&&) noexcept = default;
+ ResourcePool(ResourcePool&&) noexcept = default;
+
+ ResourcePool& operator=(const ResourcePool&) = default;
+ ResourcePool(const ResourcePool&) = default;
protected:
size_t CommitResource();
@@ -34,7 +42,7 @@ private:
/// Allocates a new page of resources.
void Grow();
- MasterSemaphore& master_semaphore;
+ MasterSemaphore* master_semaphore{};
size_t grow_step = 0; ///< Number of new resources created after an overflow
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
command->~Command();
command = next;
}
-
+ submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
@@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
- AllocateNewContext();
+ AllocateWorkerCommandBuffer();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
VKScheduler::~VKScheduler() {
- quit = true;
- cv.notify_all();
+ {
+ std::lock_guard lock{work_mutex};
+ quit = true;
+ }
+ work_cv.notify_all();
worker_thread.join();
}
@@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
void VKScheduler::Finish(VkSemaphore semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
+ WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
}
@@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
- bool finished = false;
- do {
- cv.notify_all();
- std::unique_lock lock{mutex};
- finished = chunk_queue.Empty();
- } while (!finished);
+ std::unique_lock lock{work_mutex};
+ wait_cv.wait(lock, [this] { return work_queue.empty(); });
}
void VKScheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
- chunk_queue.Push(std::move(chunk));
- cv.notify_all();
+ {
+ std::lock_guard lock{work_mutex};
+ work_queue.push(std::move(chunk));
+ }
+ work_cv.notify_one();
AcquireNewChunk();
}
@@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
-void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
+bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
- return;
+ return false;
}
state.graphics_pipeline = pipeline;
- Record([pipeline](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- });
+ return true;
}
void VKScheduler::WorkerThread() {
- Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
- std::unique_lock lock{mutex};
+ Common::SetCurrentThreadName("yuzu:VulkanWorker");
do {
- cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
- if (quit) {
- continue;
+ if (work_queue.empty()) {
+ wait_cv.notify_all();
+ }
+ std::unique_ptr<CommandChunk> work;
+ {
+ std::unique_lock lock{work_mutex};
+ work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
+ if (quit) {
+ continue;
+ }
+ work = std::move(work_queue.front());
+ work_queue.pop();
+ }
+ const bool has_submit = work->HasSubmit();
+ work->ExecuteAll(current_cmdbuf);
+ if (has_submit) {
+ AllocateWorkerCommandBuffer();
}
- auto extracted_chunk = std::move(chunk_queue.Front());
- chunk_queue.Pop();
- extracted_chunk->ExecuteAll(current_cmdbuf);
- chunk_reserve.Push(std::move(extracted_chunk));
+ std::lock_guard reserve_lock{reserve_mutex};
+ chunk_reserve.push_back(std::move(work));
} while (!quit);
}
+void VKScheduler::AllocateWorkerCommandBuffer() {
+ current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+ current_cmdbuf.Begin({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .pInheritanceInfo = nullptr,
+ });
+}
+
void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
EndPendingOperations();
InvalidateState();
- WaitWorker();
- std::unique_lock lock{mutex};
+ const u64 signal_value = master_semaphore->NextTick();
+ Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+ cmdbuf.End();
- current_cmdbuf.End();
+ const u32 num_signal_semaphores = semaphore ? 2U : 1U;
- const VkSemaphore timeline_semaphore = master_semaphore->Handle();
- const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+ const u64 wait_value = signal_value - 1;
+ const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
- const u64 signal_value = master_semaphore->CurrentTick();
- const u64 wait_value = signal_value - 1;
- const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+ const std::array signal_values{signal_value, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, semaphore};
- master_semaphore->NextTick();
-
- const std::array signal_values{signal_value, u64(0)};
- const std::array signal_semaphores{timeline_semaphore, semaphore};
-
- const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
- .pNext = nullptr,
- .waitSemaphoreValueCount = 1,
- .pWaitSemaphoreValues = &wait_value,
- .signalSemaphoreValueCount = num_signal_semaphores,
- .pSignalSemaphoreValues = signal_values.data(),
- };
- const VkSubmitInfo submit_info{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &timeline_si,
- .waitSemaphoreCount = 1,
- .pWaitSemaphores = &timeline_semaphore,
- .pWaitDstStageMask = &wait_stage_mask,
- .commandBufferCount = 1,
- .pCommandBuffers = current_cmdbuf.address(),
- .signalSemaphoreCount = num_signal_semaphores,
- .pSignalSemaphores = signal_semaphores.data(),
- };
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
- case VK_SUCCESS:
- break;
- case VK_ERROR_DEVICE_LOST:
- device.ReportLoss();
- [[fallthrough]];
- default:
- vk::Check(result);
- }
+ const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = 1,
+ .pWaitSemaphoreValues = &wait_value,
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &timeline_semaphore,
+ .pWaitDstStageMask = &wait_stage_mask,
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
+ };
+ switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+ case VK_SUCCESS:
+ break;
+ case VK_ERROR_DEVICE_LOST:
+ device.ReportLoss();
+ [[fallthrough]];
+ default:
+ vk::Check(result);
+ }
+ });
+ chunk->MarkSubmit();
+ DispatchWork();
}
void VKScheduler::AllocateNewContext() {
- std::unique_lock lock{mutex};
-
- current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
- current_cmdbuf.Begin({
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .pNext = nullptr,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
- .pInheritanceInfo = nullptr,
- });
-
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
@@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() {
}
void VKScheduler::AcquireNewChunk() {
- if (chunk_reserve.Empty()) {
+ std::lock_guard lock{reserve_mutex};
+ if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
- chunk = std::move(chunk_reserve.Front());
- chunk_reserve.Pop();
+ chunk = std::move(chunk_reserve.back());
+ chunk_reserve.pop_back();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3ce48e9d2..cf39a2363 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -8,12 +8,12 @@
#include <condition_variable>
#include <cstddef>
#include <memory>
-#include <stack>
#include <thread>
#include <utility>
+#include <queue>
+
#include "common/alignment.h"
#include "common/common_types.h"
-#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -22,6 +22,7 @@ namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
+class GraphicsPipeline;
class StateTracker;
class VKQueryCache;
@@ -52,8 +53,8 @@ public:
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
- /// Binds a pipeline to the current execution context.
- void BindGraphicsPipeline(VkPipeline pipeline);
+ /// Update the pipeline to the current execution context.
+ bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
@@ -85,6 +86,10 @@ public:
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
+ if (tick >= master_semaphore->CurrentTick()) {
+ // Make sure we are not waiting for the current tick without signalling
+ Flush();
+ }
master_semaphore->Wait(tick);
}
@@ -154,15 +159,24 @@ private:
return true;
}
+ void MarkSubmit() {
+ submit = true;
+ }
+
bool Empty() const {
return command_offset == 0;
}
+ bool HasSubmit() const {
+ return submit;
+ }
+
private:
Command* first = nullptr;
Command* last = nullptr;
size_t command_offset = 0;
+ bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
};
@@ -170,11 +184,13 @@ private:
VkRenderPass renderpass = nullptr;
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
- VkPipeline graphics_pipeline = nullptr;
+ GraphicsPipeline* graphics_pipeline = nullptr;
};
void WorkerThread();
+ void AllocateWorkerCommandBuffer();
+
void SubmitExecution(VkSemaphore semaphore);
void AllocateNewContext();
@@ -204,11 +220,13 @@ private:
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
- Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
- Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
- std::mutex mutex;
- std::condition_variable cv;
- bool quit = false;
+ std::queue<std::unique_ptr<CommandChunk>> work_queue;
+ std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
+ std::mutex reserve_mutex;
+ std::mutex work_mutex;
+ std::condition_variable work_cv;
+ std::condition_variable wait_cv;
+ std::atomic_bool quit{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
deleted file mode 100644
index c6846d886..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ /dev/null
@@ -1,3166 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <functional>
-#include <limits>
-#include <map>
-#include <optional>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include <fmt/format.h>
-
-#include <sirit/sirit.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-
-namespace Vulkan {
-
-namespace {
-
-using Sirit::Id;
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-// TODO(Rodrigo): Use rasterizer's value
-constexpr u32 MaxConstBufferFloats = 0x4000;
-constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4;
-
-constexpr u32 NumInputPatches = 32; // This value seems to be the standard
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-class Expression final {
-public:
- Expression(Id id_, Type type_) : id{id_}, type{type_} {
- ASSERT(type_ != Type::Void);
- }
- Expression() : type{Type::Void} {}
-
- Id id{};
- Type type{};
-};
-static_assert(std::is_standard_layout_v<Expression>);
-
-struct TexelBuffer {
- Id image_type{};
- Id image{};
-};
-
-struct SampledImage {
- Id image_type{};
- Id sampler_type{};
- Id sampler_pointer_type{};
- Id variable{};
-};
-
-struct StorageImage {
- Id image_type{};
- Id image{};
-};
-
-struct AttributeType {
- Type type;
- Id scalar;
- Id vector;
-};
-
-struct VertexIndices {
- std::optional<u32> position;
- std::optional<u32> layer;
- std::optional<u32> viewport;
- std::optional<u32> point_size;
- std::optional<u32> clip_distances;
-};
-
-struct GenericVaryingDescription {
- Id id = nullptr;
- u32 first_element = 0;
- bool is_scalar = false;
-};
-
-spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
- ASSERT(!sampler.is_buffer);
- switch (sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return spv::Dim::Dim1D;
- case Tegra::Shader::TextureType::Texture2D:
- return spv::Dim::Dim2D;
- case Tegra::Shader::TextureType::Texture3D:
- return spv::Dim::Dim3D;
- case Tegra::Shader::TextureType::TextureCube:
- return spv::Dim::Cube;
- default:
- UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
- return spv::Dim::Dim2D;
- }
-}
-
-std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
- switch (image.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return {spv::Dim::Dim1D, false};
- case Tegra::Shader::ImageType::TextureBuffer:
- return {spv::Dim::Buffer, false};
- case Tegra::Shader::ImageType::Texture1DArray:
- return {spv::Dim::Dim1D, true};
- case Tegra::Shader::ImageType::Texture2D:
- return {spv::Dim::Dim2D, false};
- case Tegra::Shader::ImageType::Texture2DArray:
- return {spv::Dim::Dim2D, true};
- case Tegra::Shader::ImageType::Texture3D:
- return {spv::Dim::Dim3D, false};
- default:
- UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
- return {spv::Dim::Dim2D, false};
- }
-}
-
-/// Returns the number of vertices present in a primitive topology.
-u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) {
- switch (primitive_topology) {
- case Maxwell::PrimitiveTopology::Points:
- return 1;
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineLoop:
- case Maxwell::PrimitiveTopology::LineStrip:
- return 2;
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return 3;
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return 4;
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return 6;
- case Maxwell::PrimitiveTopology::Quads:
- UNIMPLEMENTED_MSG("Quads");
- return 3;
- case Maxwell::PrimitiveTopology::QuadStrip:
- UNIMPLEMENTED_MSG("QuadStrip");
- return 3;
- case Maxwell::PrimitiveTopology::Polygon:
- UNIMPLEMENTED_MSG("Polygon");
- return 3;
- case Maxwell::PrimitiveTopology::Patches:
- UNIMPLEMENTED_MSG("Patches");
- return 3;
- default:
- UNREACHABLE();
- return 3;
- }
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) {
- switch (primitive) {
- case Maxwell::TessellationPrimitive::Isolines:
- return spv::ExecutionMode::Isolines;
- case Maxwell::TessellationPrimitive::Triangles:
- return spv::ExecutionMode::Triangles;
- case Maxwell::TessellationPrimitive::Quads:
- return spv::ExecutionMode::Quads;
- }
- UNREACHABLE();
- return spv::ExecutionMode::Triangles;
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) {
- switch (spacing) {
- case Maxwell::TessellationSpacing::Equal:
- return spv::ExecutionMode::SpacingEqual;
- case Maxwell::TessellationSpacing::FractionalOdd:
- return spv::ExecutionMode::SpacingFractionalOdd;
- case Maxwell::TessellationSpacing::FractionalEven:
- return spv::ExecutionMode::SpacingFractionalEven;
- }
- UNREACHABLE();
- return spv::ExecutionMode::SpacingEqual;
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) {
- switch (input_topology) {
- case Maxwell::PrimitiveTopology::Points:
- return spv::ExecutionMode::InputPoints;
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineLoop:
- case Maxwell::PrimitiveTopology::LineStrip:
- return spv::ExecutionMode::InputLines;
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return spv::ExecutionMode::InputLinesAdjacency;
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return spv::ExecutionMode::InputTrianglesAdjacency;
- case Maxwell::PrimitiveTopology::Quads:
- UNIMPLEMENTED_MSG("Quads");
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::QuadStrip:
- UNIMPLEMENTED_MSG("QuadStrip");
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::Polygon:
- UNIMPLEMENTED_MSG("Polygon");
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::Patches:
- UNIMPLEMENTED_MSG("Patches");
- return spv::ExecutionMode::Triangles;
- }
- UNREACHABLE();
- return spv::ExecutionMode::Triangles;
-}
-
-spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) {
- switch (output_topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return spv::ExecutionMode::OutputPoints;
- case Tegra::Shader::OutputTopology::LineStrip:
- return spv::ExecutionMode::OutputLineStrip;
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return spv::ExecutionMode::OutputTriangleStrip;
- default:
- UNREACHABLE();
- return spv::ExecutionMode::OutputPoints;
- }
-}
-
-/// Returns true if an attribute index is one of the 32 generic attributes
-constexpr bool IsGenericAttribute(Attribute::Index attribute) {
- return attribute >= Attribute::Index::Attribute_0 &&
- attribute <= Attribute::Index::Attribute_31;
-}
-
-/// Returns the location of a generic attribute
-u32 GetGenericAttributeLocation(Attribute::Index attribute) {
- ASSERT(IsGenericAttribute(attribute));
- return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
- const auto& meta{operand.GetMeta()};
- if (std::holds_alternative<MetaArithmetic>(meta)) {
- return std::get<MetaArithmetic>(meta).precise;
- }
- return false;
-}
-
-class SPIRVDecompiler final : public Sirit::Module {
-public:
- explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
- const Registry& registry_, const Specialization& specialization_)
- : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
- registry{registry_}, specialization{specialization_} {
- if (stage_ != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
- }
-
- AddCapability(spv::Capability::Shader);
- AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
- AddCapability(spv::Capability::ImageQuery);
- AddCapability(spv::Capability::Image1D);
- AddCapability(spv::Capability::ImageBuffer);
- AddCapability(spv::Capability::ImageGatherExtended);
- AddCapability(spv::Capability::SampledBuffer);
- AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
- AddCapability(spv::Capability::DrawParameters);
- AddCapability(spv::Capability::SubgroupBallotKHR);
- AddCapability(spv::Capability::SubgroupVoteKHR);
- AddExtension("SPV_KHR_16bit_storage");
- AddExtension("SPV_KHR_shader_ballot");
- AddExtension("SPV_KHR_subgroup_vote");
- AddExtension("SPV_KHR_storage_buffer_storage_class");
- AddExtension("SPV_KHR_variable_pointers");
- AddExtension("SPV_KHR_shader_draw_parameters");
-
- if (!transform_feedback.empty()) {
- if (device.IsExtTransformFeedbackSupported()) {
- AddCapability(spv::Capability::TransformFeedback);
- } else {
- LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
- "supported on this device");
- }
- }
- if (ir.UsesLayer() || ir.UsesViewportIndex()) {
- if (ir.UsesViewportIndex()) {
- AddCapability(spv::Capability::MultiViewport);
- }
- if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
- AddExtension("SPV_EXT_shader_viewport_index_layer");
- AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
- }
- }
- if (device.IsFormatlessImageLoadSupported()) {
- AddCapability(spv::Capability::StorageImageReadWithoutFormat);
- }
- if (device.IsFloat16Supported()) {
- AddCapability(spv::Capability::Float16);
- }
- t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
- t_half = Name(TypeVector(t_scalar_half, 2), "half");
-
- const Id main = Decompile();
-
- switch (stage) {
- case ShaderType::Vertex:
- AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces);
- break;
- case ShaderType::TesselationControl:
- AddCapability(spv::Capability::Tessellation);
- AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces);
- AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
- header.common2.threads_per_input_primitive);
- break;
- case ShaderType::TesselationEval: {
- const auto& info = registry.GetGraphicsInfo();
- AddCapability(spv::Capability::Tessellation);
- AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
- AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
- AddExecutionMode(main, info.tessellation_clockwise
- ? spv::ExecutionMode::VertexOrderCw
- : spv::ExecutionMode::VertexOrderCcw);
- break;
- }
- case ShaderType::Geometry: {
- const auto& info = registry.GetGraphicsInfo();
- AddCapability(spv::Capability::Geometry);
- AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
- AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
- AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
- header.common4.max_output_vertices);
- // TODO(Rodrigo): Where can we get this info from?
- AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
- break;
- }
- case ShaderType::Fragment:
- AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
- AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
- if (header.ps.omap.depth) {
- AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
- }
- if (specialization.early_fragment_tests) {
- AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
- }
- break;
- case ShaderType::Compute:
- const auto workgroup_size = specialization.workgroup_size;
- AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
- workgroup_size[1], workgroup_size[2]);
- AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces);
- break;
- }
- }
-
-private:
- Id Decompile() {
- DeclareCommon();
- DeclareVertex();
- DeclareTessControl();
- DeclareTessEval();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareRegisters();
- DeclareCustomVariables();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareSharedMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
-
- u32 binding = specialization.base_binding;
- binding = DeclareConstantBuffers(binding);
- binding = DeclareGlobalBuffers(binding);
- binding = DeclareUniformTexels(binding);
- binding = DeclareSamplers(binding);
- binding = DeclareStorageTexels(binding);
- binding = DeclareImages(binding);
-
- const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
- AddLabel();
-
- if (ir.IsDecompiled()) {
- DeclareFlowVariables();
- DecompileAST();
- } else {
- AllocateLabels();
- DecompileBranchMode();
- }
-
- OpReturn();
- OpFunctionEnd();
-
- return main;
- }
-
- void DefinePrologue() {
- if (stage == ShaderType::Vertex) {
- // Clear Position to avoid reading trash on the Z conversion.
- const auto position_index = out_indices.position.value();
- const Id position = AccessElement(t_out_float4, out_vertex, position_index);
- OpStore(position, v_varying_default);
-
- if (specialization.point_size) {
- const u32 point_size_index = out_indices.point_size.value();
- const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index);
- OpStore(out_point_size, Constant(t_float, *specialization.point_size));
- }
- }
- }
-
- void DecompileAST();
-
- void DecompileBranchMode() {
- const u32 first_address = ir.GetBasicBlocks().begin()->first;
- const Id loop_label = OpLabel("loop");
- const Id merge_label = OpLabel("merge");
- const Id dummy_label = OpLabel();
- const Id jump_label = OpLabel();
- continue_label = OpLabel("continue");
-
- std::vector<Sirit::Literal> literals;
- std::vector<Id> branch_labels;
- for (const auto& [literal, label] : labels) {
- literals.push_back(literal);
- branch_labels.push_back(label);
- }
-
- jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
- spv::StorageClass::Function, Constant(t_uint, first_address));
- AddLocalVariable(jmp_to);
-
- std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
- std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
-
- Name(jmp_to, "jmp_to");
- Name(ssy_flow_stack, "ssy_flow_stack");
- Name(ssy_flow_stack_top, "ssy_flow_stack_top");
- Name(pbk_flow_stack, "pbk_flow_stack");
- Name(pbk_flow_stack_top, "pbk_flow_stack_top");
-
- DefinePrologue();
-
- OpBranch(loop_label);
- AddLabel(loop_label);
- OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
- OpBranch(dummy_label);
-
- AddLabel(dummy_label);
- const Id default_branch = OpLabel();
- const Id jmp_to_load = OpLoad(t_uint, jmp_to);
- OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone);
- OpSwitch(jmp_to_load, default_branch, literals, branch_labels);
-
- AddLabel(default_branch);
- OpReturn();
-
- for (const auto& [address, bb] : ir.GetBasicBlocks()) {
- AddLabel(labels.at(address));
-
- VisitBasicBlock(bb);
-
- const auto next_it = labels.lower_bound(address + 1);
- const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
- OpBranch(next_label);
- }
-
- AddLabel(jump_label);
- OpBranch(continue_label);
- AddLabel(continue_label);
- OpBranch(loop_label);
- AddLabel(merge_label);
- }
-
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
- static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
-
- void AllocateLabels() {
- for (const auto& pair : ir.GetBasicBlocks()) {
- const u32 address = pair.first;
- labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
- }
- }
-
- void DeclareCommon() {
- thread_id =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
- thread_masks[0] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
- thread_masks[1] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
- thread_masks[2] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
- thread_masks[3] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
- thread_masks[4] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
- }
-
- void DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
- Id out_vertex_struct;
- std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
- const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
- out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output);
- interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
-
- // Declare input attributes
- vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
- instance_index =
- DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
- base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
- base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
- }
-
- void DeclareTessControl() {
- if (stage != ShaderType::TesselationControl) {
- return;
- }
- DeclareInputVertexArray(NumInputPatches);
- DeclareOutputVertexArray(header.common2.threads_per_input_primitive);
-
- tess_level_outer = DeclareBuiltIn(
- spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output,
- TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))),
- "tess_level_outer");
- Decorate(tess_level_outer, spv::Decoration::Patch);
-
- tess_level_inner = DeclareBuiltIn(
- spv::BuiltIn::TessLevelInner, spv::StorageClass::Output,
- TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))),
- "tess_level_inner");
- Decorate(tess_level_inner, spv::Decoration::Patch);
-
- invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id");
- }
-
- void DeclareTessEval() {
- if (stage != ShaderType::TesselationEval) {
- return;
- }
- DeclareInputVertexArray(NumInputPatches);
- DeclareOutputVertex();
-
- tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord");
- }
-
- void DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
- const auto& info = registry.GetGraphicsInfo();
- const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
- DeclareInputVertexArray(num_input);
- DeclareOutputVertex();
- }
-
- void DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
-
- for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
- if (!IsRenderTargetEnabled(rt)) {
- continue;
- }
- const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
- Name(id, fmt::format("frag_color{}", rt));
- Decorate(id, spv::Decoration::Location, rt);
-
- frag_colors[rt] = id;
- interfaces.push_back(id);
- }
-
- if (header.ps.omap.depth) {
- frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
- Name(frag_depth, "frag_depth");
- Decorate(frag_depth, spv::Decoration::BuiltIn,
- static_cast<u32>(spv::BuiltIn::FragDepth));
-
- interfaces.push_back(frag_depth);
- }
-
- frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord");
- front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing");
- point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord");
- }
-
- void DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
-
- workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id");
- local_invocation_id =
- DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id");
- }
-
- void DeclareRegisters() {
- for (const u32 gpr : ir.GetRegisters()) {
- const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
- Name(id, fmt::format("gpr_{}", gpr));
- registers.emplace(gpr, AddGlobalVariable(id));
- }
- }
-
- void DeclareCustomVariables() {
- const u32 num_custom_variables = ir.GetNumCustomVariables();
- for (u32 i = 0; i < num_custom_variables; ++i) {
- const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
- Name(id, fmt::format("custom_var_{}", i));
- custom_variables.emplace(i, AddGlobalVariable(id));
- }
- }
-
- void DeclarePredicates() {
- for (const auto pred : ir.GetPredicates()) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
- predicates.emplace(pred, AddGlobalVariable(id));
- }
- }
-
- void DeclareFlowVariables() {
- for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
- flow_variables.emplace(i, AddGlobalVariable(id));
- }
- }
-
- void DeclareLocalMemory() {
- // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
- // specialization time.
- const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize();
- if (lmem_size == 0) {
- return;
- }
- const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4);
- const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
- const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
- Name(type_pointer, "LocalMemory");
-
- local_memory =
- OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
- AddGlobalVariable(Name(local_memory, "local_memory"));
- }
-
- void DeclareSharedMemory() {
- if (stage != ShaderType::Compute) {
- return;
- }
- t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
-
- u32 smem_size = specialization.shared_memory_size * 4;
- if (smem_size == 0) {
- // Avoid declaring an empty array.
- return;
- }
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- if (smem_size > limit) {
- LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
- smem_size, limit);
- smem_size = limit;
- }
-
- const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
- const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
- Name(type_pointer, "SharedMemory");
-
- shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup);
- AddGlobalVariable(Name(shared_memory, "shared_memory"));
- }
-
- void DeclareInternalFlags() {
- static constexpr std::array names{"zero", "sign", "carry", "overflow"};
-
- for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
- }
- }
-
- void DeclareInputVertexArray(u32 length) {
- constexpr auto storage = spv::StorageClass::Input;
- std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length);
- }
-
- void DeclareOutputVertexArray(u32 length) {
- constexpr auto storage = spv::StorageClass::Output;
- std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length);
- }
-
- std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class,
- std::string name, u32 length) {
- const auto [struct_id, indices] = DeclareVertexStruct();
- const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length));
- const Id vertex_ptr = TypePointer(storage_class, vertex_array);
- const Id vertex = OpVariable(vertex_ptr, storage_class);
- AddGlobalVariable(Name(vertex, std::move(name)));
- interfaces.push_back(vertex);
- return {indices, vertex};
- }
-
- void DeclareOutputVertex() {
- Id out_vertex_struct;
- std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
- const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
- out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output);
- interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
- }
-
- void DeclareInputAttributes() {
- for (const auto index : ir.GetInputAttributes()) {
- if (!IsGenericAttribute(index)) {
- continue;
- }
- const u32 location = GetGenericAttributeLocation(index);
- if (!IsAttributeEnabled(location)) {
- continue;
- }
- const auto type_descriptor = GetAttributeType(location);
- Id type;
- if (IsInputAttributeArray()) {
- type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3);
- type = TypeArray(type, Constant(t_uint, GetNumInputVertices()));
- type = TypePointer(spv::StorageClass::Input, type);
- } else {
- type = type_descriptor.vector;
- }
- const Id id = OpVariable(type, spv::StorageClass::Input);
- AddGlobalVariable(Name(id, fmt::format("in_attr{}", location)));
- input_attributes.emplace(index, id);
- interfaces.push_back(id);
-
- Decorate(id, spv::Decoration::Location, location);
-
- if (stage != ShaderType::Fragment) {
- continue;
- }
- switch (header.ps.GetPixelImap(location)) {
- case PixelImap::Constant:
- Decorate(id, spv::Decoration::Flat);
- break;
- case PixelImap::Perspective:
- // Default
- break;
- case PixelImap::ScreenLinear:
- Decorate(id, spv::Decoration::NoPerspective);
- break;
- default:
- UNREACHABLE_MSG("Unused attribute being fetched");
- }
- }
- }
-
- void DeclareOutputAttributes() {
- if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
- return;
- }
-
- UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
- for (const auto index : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(index)) {
- continue;
- }
- DeclareOutputAttribute(index);
- }
- }
-
- void DeclareOutputAttribute(Attribute::Index index) {
- static constexpr std::string_view swizzle = "xyzw";
-
- const u32 location = GetGenericAttributeLocation(index);
- u8 element = 0;
- while (element < 4) {
- const std::size_t remainder = 4 - element;
-
- std::size_t num_components = remainder;
- const std::optional tfb = GetTransformFeedbackInfo(index, element);
- if (tfb) {
- num_components = tfb->components;
- }
-
- Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
- Id varying_default = v_varying_default;
- if (IsOutputAttributeArray()) {
- const u32 num = GetNumOutputVertices();
- type = TypeArray(type, Constant(t_uint, num));
- if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
- // Intel's proprietary driver fails to setup defaults for arrayed output
- // attributes.
- varying_default = ConstantComposite(type, std::vector(num, varying_default));
- }
- }
- type = TypePointer(spv::StorageClass::Output, type);
-
- std::string name = fmt::format("out_attr{}", location);
- if (num_components < 4 || element > 0) {
- name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
- }
-
- const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
- Name(AddGlobalVariable(id), name);
-
- GenericVaryingDescription description;
- description.id = id;
- description.first_element = element;
- description.is_scalar = num_components == 1;
- for (u32 i = 0; i < num_components; ++i) {
- const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
- output_attributes.emplace(offset, description);
- }
- interfaces.push_back(id);
-
- Decorate(id, spv::Decoration::Location, location);
- if (element > 0) {
- Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
- }
- if (tfb && device.IsExtTransformFeedbackSupported()) {
- Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
- Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
- Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
- }
-
- element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
- }
- }
-
- std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return {};
- }
- return it->second;
- }
-
- u32 DeclareConstantBuffers(u32 binding) {
- for (const auto& [index, size] : ir.GetConstantBuffers()) {
- const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
- : t_cbuf_std140_ubo;
- const Id id = OpVariable(type, spv::StorageClass::Uniform);
- AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- constant_buffers.emplace(index, id);
- }
- return binding;
- }
-
- u32 DeclareGlobalBuffers(u32 binding) {
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
- AddGlobalVariable(
- Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- global_buffers.emplace(base, id);
- }
- return binding;
- }
-
- u32 DeclareUniformTexels(u32 binding) {
- for (const auto& sampler : ir.GetSamplers()) {
- if (!sampler.is_buffer) {
- continue;
- }
- ASSERT(!sampler.is_array);
- ASSERT(!sampler.is_shadow);
-
- constexpr auto dim = spv::Dim::Buffer;
- constexpr int depth = 0;
- constexpr int arrayed = 0;
- constexpr bool ms = false;
- constexpr int sampled = 1;
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-
- uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
- }
- return binding;
- }
-
- u32 DeclareSamplers(u32 binding) {
- for (const auto& sampler : ir.GetSamplers()) {
- if (sampler.is_buffer) {
- continue;
- }
- const auto dim = GetSamplerDim(sampler);
- const int depth = sampler.is_shadow ? 1 : 0;
- const int arrayed = sampler.is_array ? 1 : 0;
- constexpr bool ms = false;
- constexpr int sampled = 1;
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
- const Id sampler_type = TypeSampledImage(image_type);
- const Id sampler_pointer_type =
- TypePointer(spv::StorageClass::UniformConstant, sampler_type);
- const Id type = sampler.is_indexed
- ? TypeArray(sampler_type, Constant(t_uint, sampler.size))
- : sampler_type;
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-
- sampled_images.emplace(
- sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id});
- }
- return binding;
- }
-
- u32 DeclareStorageTexels(u32 binding) {
- for (const auto& image : ir.GetImages()) {
- if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
- continue;
- }
- DeclareImage(image, binding);
- }
- return binding;
- }
-
- u32 DeclareImages(u32 binding) {
- for (const auto& image : ir.GetImages()) {
- if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
- continue;
- }
- DeclareImage(image, binding);
- }
- return binding;
- }
-
- void DeclareImage(const ImageEntry& image, u32& binding) {
- const auto [dim, arrayed] = GetImageDim(image);
- constexpr int depth = 0;
- constexpr bool ms = false;
- constexpr int sampled = 2; // This won't be accessed with a sampler
- const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- if (image.is_read && !image.is_written) {
- Decorate(id, spv::Decoration::NonWritable);
- } else if (image.is_written && !image.is_read) {
- Decorate(id, spv::Decoration::NonReadable);
- }
-
- images.emplace(image.index, StorageImage{image_type, id});
- }
-
- bool IsRenderTargetEnabled(u32 rt) const {
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
- return true;
- }
- }
- return false;
- }
-
- bool IsInputAttributeArray() const {
- return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
- stage == ShaderType::Geometry;
- }
-
- bool IsOutputAttributeArray() const {
- return stage == ShaderType::TesselationControl;
- }
-
- bool IsAttributeEnabled(u32 location) const {
- return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
- }
-
- u32 GetNumInputVertices() const {
- switch (stage) {
- case ShaderType::Geometry:
- return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
- case ShaderType::TesselationControl:
- case ShaderType::TesselationEval:
- return NumInputPatches;
- default:
- UNREACHABLE();
- return 1;
- }
- }
-
- u32 GetNumOutputVertices() const {
- switch (stage) {
- case ShaderType::TesselationControl:
- return header.common2.threads_per_input_primitive;
- default:
- UNREACHABLE();
- return 1;
- }
- }
-
- std::tuple<Id, VertexIndices> DeclareVertexStruct() {
- struct BuiltIn {
- Id type;
- spv::BuiltIn builtin;
- const char* name;
- };
- std::vector<BuiltIn> members;
- members.reserve(4);
-
- const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) {
- const auto index = static_cast<u32>(members.size());
- members.push_back(BuiltIn{type, builtin, name});
- return index;
- };
-
- VertexIndices indices;
- indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
-
- if (ir.UsesLayer()) {
- if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
- indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
- } else {
- LOG_ERROR(
- Render_Vulkan,
- "Shader requires Layer but it's not supported on this stage with this device.");
- }
- }
-
- if (ir.UsesViewportIndex()) {
- if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
- indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
- } else {
- LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
- "this stage with this device.");
- }
- }
-
- if (ir.UsesPointSize() || specialization.point_size) {
- indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
- }
-
- const auto& ir_output_attributes = ir.GetOutputAttributes();
- const bool declare_clip_distances = std::any_of(
- ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
- return index == Attribute::Index::ClipDistances0123 ||
- index == Attribute::Index::ClipDistances4567;
- });
- if (declare_clip_distances) {
- indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)),
- spv::BuiltIn::ClipDistance, "clip_distances");
- }
-
- std::vector<Id> member_types;
- member_types.reserve(members.size());
- for (std::size_t i = 0; i < members.size(); ++i) {
- member_types.push_back(members[i].type);
- }
- const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex");
- Decorate(per_vertex_struct, spv::Decoration::Block);
-
- for (std::size_t index = 0; index < members.size(); ++index) {
- const auto& member = members[index];
- MemberName(per_vertex_struct, static_cast<u32>(index), member.name);
- MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn,
- static_cast<u32>(member.builtin));
- }
-
- return {per_vertex_struct, indices};
- }
-
- void VisitBasicBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node);
- }
- }
-
- Expression Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
- ASSERT(type == Type::Void);
- }
- const auto operation_index = static_cast<std::size_t>(operation->GetCode());
- const auto decompiler = operation_decompilers[operation_index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return {v_float_zero, Type::Float};
- }
- return {OpLoad(t_float, registers.at(index)), Type::Float};
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- const u32 index = cv->GetIndex();
- return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- return {Constant(t_uint, immediate->GetValue()), Type::Uint};
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const auto value = [&]() -> Id {
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- return v_true;
- case Tegra::Shader::Pred::NeverExecute:
- return v_false;
- default:
- return OpLoad(t_bool, predicates.at(index));
- }
- }();
- if (predicate->IsNegated()) {
- return {OpLogicalNot(t_bool, value), Type::Bool};
- }
- return {value, Type::Bool};
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- const auto attribute = abuf->GetIndex();
- const u32 element = abuf->GetElement();
- const auto& buffer = abuf->GetBuffer();
-
- const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
- std::vector<Id> members;
- members.reserve(std::size(indices) + 1);
-
- if (buffer && IsInputAttributeArray()) {
- members.push_back(AsUint(Visit(buffer)));
- }
- for (const u32 index : indices) {
- members.push_back(Constant(t_uint, index));
- }
- return OpAccessChain(pointer_type, composite, members);
- };
-
- switch (attribute) {
- case Attribute::Index::Position: {
- if (stage == ShaderType::Fragment) {
- return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
- Type::Float};
- }
- const std::vector elements = {in_indices.position.value(), element};
- return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float};
- }
- case Attribute::Index::PointCoord: {
- switch (element) {
- case 0:
- case 1:
- return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element),
- Type::Float};
- }
- UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element);
- return {v_float_zero, Type::Float};
- }
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- switch (element) {
- case 0:
- case 1:
- return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
- Type::Float};
- case 2:
- return {
- OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
- Type::Int};
- case 3:
- return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
- Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return {Constant(t_uint, 0U), Type::Uint};
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderType::Fragment);
- if (element == 3) {
- const Id is_front_facing = OpLoad(t_bool, front_facing);
- const Id true_value = Constant(t_int, static_cast<s32>(-1));
- const Id false_value = Constant(t_int, 0);
- return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return {v_float_zero, Type::Float};
- default:
- if (!IsGenericAttribute(attribute)) {
- break;
- }
- const u32 location = GetGenericAttributeLocation(attribute);
- if (!IsAttributeEnabled(location)) {
- // Disabled attributes (also known as constant attributes) always return zero.
- return {v_float_zero, Type::Float};
- }
- const auto type_descriptor = GetAttributeType(location);
- const Type type = type_descriptor.type;
- const Id attribute_id = input_attributes.at(attribute);
- const std::vector elements = {element};
- const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
- return {OpLoad(GetTypeDefinition(type), pointer), type};
- }
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
- return {v_float_zero, Type::Float};
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- const Node& offset = cbuf->GetOffset();
- const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
-
- Id pointer{};
- if (device.IsKhrUniformBufferStandardLayoutSupported()) {
- const Id buffer_offset =
- OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U));
- pointer =
- OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset);
- } else {
- Id buffer_index{};
- Id buffer_element{};
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT(offset_imm % 4 == 0);
- buffer_index = Constant(t_uint, offset_imm / 16);
- buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
- } else if (std::holds_alternative<OperationNode>(*offset)) {
- // Indirect access
- const Id offset_id = AsUint(Visit(offset));
- const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4));
- const Id final_offset =
- OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1));
- buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4));
- buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4));
- } else {
- UNREACHABLE_MSG("Unmanaged offset node type");
- }
- pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
- buffer_element);
- }
- return {OpLoad(t_float, pointer), Type::Float};
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- Id address = AsUint(Visit(lmem->GetAddress()));
- address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
- const Id pointer = OpAccessChain(t_prv_float, local_memory, address);
- return {OpLoad(t_float, pointer), Type::Float};
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag()));
- return {OpLoad(t_bool, flag), Type::Bool};
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
- ASSERT(type == Type::Void);
- }
- // It's invalid to call conditional on nested nodes, use an operation instead
- const Id true_label = OpLabel();
- const Id skip_label = OpLabel();
- const Id condition = AsBool(Visit(conditional->GetCondition()));
- OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone);
- OpBranchConditional(condition, true_label, skip_label);
- AddLabel(true_label);
-
- conditional_branch_set = true;
- inside_branch = false;
- VisitBasicBlock(conditional->GetCode());
- conditional_branch_set = false;
- if (!inside_branch) {
- OpBranch(skip_label);
- } else {
- inside_branch = false;
- }
- AddLabel(skip_label);
- return {};
- }
-
- if (const auto comment = std::get_if<CommentNode>(&*node)) {
- if (device.HasDebuggingToolAttached()) {
- // We should insert comments with OpString instead of using named variables
- Name(OpUndef(t_int), comment->GetText());
- }
- return {};
- }
-
- UNREACHABLE();
- return {};
- }
-
- template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
- Expression Unary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
-
- const Id value = (this->*func)(type_def, op_a);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
- Type type_b = type_a>
- Expression Binary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
- const Id op_b = As(Visit(operation[1]), type_b);
-
- const Id value = (this->*func)(type_def, op_a, op_b);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
- Type type_b = type_a, Type type_c = type_b>
- Expression Ternary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
- const Id op_b = As(Visit(operation[1]), type_b);
- const Id op_c = As(Visit(operation[2]), type_c);
-
- const Id value = (this->*func)(type_def, op_a, op_b, op_c);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
- Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
- Expression Quaternary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
- const Id op_b = As(Visit(operation[1]), type_b);
- const Id op_c = As(Visit(operation[2]), type_c);
- const Id op_d = As(Visit(operation[3]), type_d);
-
- const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- Expression Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Expression target{};
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op but we still have to visit its source
- // because it might have side effects.
- Visit(src);
- return {};
- }
- target = {registers.at(gpr->GetIndex()), Type::Float};
-
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- const auto& buffer = abuf->GetBuffer();
- const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
- std::vector<Id> members;
- members.reserve(std::size(indices) + 1);
-
- if (buffer && IsOutputAttributeArray()) {
- members.push_back(AsUint(Visit(buffer)));
- }
- for (const u32 index : indices) {
- members.push_back(Constant(t_uint, index));
- }
- return OpAccessChain(pointer_type, composite, members);
- };
-
- target = [&]() -> Expression {
- const u32 element = abuf->GetElement();
- switch (const auto attribute = abuf->GetIndex(); attribute) {
- case Attribute::Index::Position: {
- const u32 index = out_indices.position.value();
- return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float};
- }
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 1: {
- if (!out_indices.layer) {
- return {};
- }
- const u32 index = out_indices.layer.value();
- return {AccessElement(t_out_int, out_vertex, index), Type::Int};
- }
- case 2: {
- if (!out_indices.viewport) {
- return {};
- }
- const u32 index = out_indices.viewport.value();
- return {AccessElement(t_out_int, out_vertex, index), Type::Int};
- }
- case 3: {
- const auto index = out_indices.point_size.value();
- return {AccessElement(t_out_float, out_vertex, index), Type::Float};
- }
- default:
- UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement());
- return {};
- }
- case Attribute::Index::ClipDistances0123: {
- const u32 index = out_indices.clip_distances.value();
- return {AccessElement(t_out_float, out_vertex, index, element), Type::Float};
- }
- case Attribute::Index::ClipDistances4567: {
- const u32 index = out_indices.clip_distances.value();
- return {AccessElement(t_out_float, out_vertex, index, element + 4),
- Type::Float};
- }
- default:
- if (IsGenericAttribute(attribute)) {
- const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
- const GenericVaryingDescription description = output_attributes.at(offset);
- const Id composite = description.id;
- std::vector<u32> indices;
- if (!description.is_scalar) {
- indices.push_back(element - description.first_element);
- }
- return {ArrayPass(t_out_float, composite, indices), Type::Float};
- }
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
- static_cast<u32>(attribute));
- return {};
- }
- }();
-
- } else if (const auto patch = std::get_if<PatchNode>(&*dest)) {
- target = [&]() -> Expression {
- const u32 offset = patch->GetOffset();
- switch (offset) {
- case 0:
- case 1:
- case 2:
- case 3:
- return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float};
- case 4:
- case 5:
- return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float};
- }
- UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset);
- return {};
- }();
-
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- Id address = AsUint(Visit(lmem->GetAddress()));
- address = OpUDiv(t_uint, address, Constant(t_uint, 4));
- target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
-
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- target = {GetSharedMemoryPointer(*smem), Type::Uint};
-
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
-
- } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
- target = {custom_variables.at(cv->GetIndex()), Type::Float};
-
- } else {
- UNIMPLEMENTED();
- }
-
- if (!target.id) {
- // On failure we return a nullptr target.id, skip these stores.
- return {};
- }
-
- OpStore(target.id, As(Visit(src), target.type));
- return {};
- }
-
- template <u32 offset>
- Expression FCastHalf(Operation operation) {
- const Id value = AsHalfFloat(Visit(operation[0]));
- return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)),
- Type::Float};
- }
-
- Expression FSwizzleAdd(Operation operation) {
- const Id minus = Constant(t_float, -1.0f);
- const Id plus = v_float_one;
- const Id zero = v_float_zero;
- const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero);
- const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus);
-
- Id mask = OpLoad(t_uint, thread_id);
- mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
- mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1));
- mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask);
- mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
-
- const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask);
- const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask);
-
- const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a);
- const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b);
- return {OpFAdd(t_float, op_a, op_b), Type::Float};
- }
-
- Expression HNegate(Operation operation) {
- const bool is_f16 = device.IsFloat16Supported();
- const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000);
- const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000);
- const auto GetNegate = [&](std::size_t index) {
- return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one);
- };
- const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2));
- return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat};
- }
-
- Expression HClamp(Operation operation) {
- const auto Pack = [&](std::size_t index) {
- const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index])));
- return OpCompositeConstruct(t_half, scalar, scalar);
- };
- const Id value = AsHalfFloat(Visit(operation[0]));
- const Id min = Pack(1);
- const Id max = Pack(2);
-
- const Id clamped = OpFClamp(t_half, value, min, max);
- if (IsPrecise(operation)) {
- Decorate(clamped, spv::Decoration::NoContraction);
- }
- return {clamped, Type::HalfFloat};
- }
-
- Expression HCastFloat(Operation operation) {
- const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
- return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat};
- }
-
- Expression HUnpack(Operation operation) {
- Expression operand = Visit(operation[0]);
- const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta());
- if (type == Tegra::Shader::HalfType::H0_H1) {
- return operand;
- }
- const auto value = [&] {
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::F32:
- return GetHalfScalarFromFloat(AsFloat(operand));
- case Tegra::Shader::HalfType::H0_H0:
- return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0);
- case Tegra::Shader::HalfType::H1_H1:
- return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1);
- default:
- UNREACHABLE();
- return ConstantNull(t_half);
- }
- }();
- return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat};
- }
-
- Expression HMergeF32(Operation operation) {
- const Id value = AsHalfFloat(Visit(operation[0]));
- return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float};
- }
-
- template <u32 offset>
- Expression HMergeHN(Operation operation) {
- const Id target = AsHalfFloat(Visit(operation[0]));
- const Id source = AsHalfFloat(Visit(operation[1]));
- const Id object = OpCompositeExtract(t_scalar_half, source, offset);
- return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat};
- }
-
- Expression HPack2(Operation operation) {
- const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
- const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1])));
- return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
- }
-
- Expression LogicalAddCarry(Operation operation) {
- const Id op_a = AsUint(Visit(operation[0]));
- const Id op_b = AsUint(Visit(operation[1]));
-
- const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
- const Id carry = OpCompositeExtract(t_uint, result, 1);
- return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
- }
-
- Expression LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Id target{};
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = predicates.at(index);
-
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
- target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
- }
-
- OpStore(target, AsBool(Visit(src)));
- return {};
- }
-
- Expression LogicalFOrdered(Operation operation) {
- // Emulate SPIR-V's OpOrdered
- const Id op_a = AsFloat(Visit(operation[0]));
- const Id op_b = AsFloat(Visit(operation[1]));
- const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
- const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
- return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
- }
-
- Expression LogicalFUnordered(Operation operation) {
- // Emulate SPIR-V's OpUnordered
- const Id op_a = AsFloat(Visit(operation[0]));
- const Id op_b = AsFloat(Visit(operation[1]));
- const Id is_nan_a = OpIsNan(t_bool, op_a);
- const Id is_nan_b = OpIsNan(t_bool, op_b);
- return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
- }
-
- Id GetTextureSampler(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- ASSERT(!meta.sampler.is_buffer);
-
- const auto& entry = sampled_images.at(meta.sampler.index);
- Id sampler = entry.variable;
- if (meta.sampler.is_indexed) {
- const Id index = AsInt(Visit(meta.index));
- sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
- }
- return OpLoad(entry.sampler_type, sampler);
- }
-
- Id GetTextureImage(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 index = meta.sampler.index;
- if (meta.sampler.is_buffer) {
- const auto& entry = uniform_texels.at(index);
- return OpLoad(entry.image_type, entry.image);
- } else {
- const auto& entry = sampled_images.at(index);
- return OpImage(entry.image_type, GetTextureSampler(operation));
- }
- }
-
- Id GetImage(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const auto entry = images.at(meta.image.index);
- return OpLoad(entry.image_type, entry.image);
- }
-
- Id AssembleVector(const std::vector<Id>& coords, Type type) {
- const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1);
- return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords);
- }
-
- Id GetCoordinates(Operation operation, Type type) {
- std::vector<Id> coords;
- for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
- coords.push_back(As(Visit(operation[i]), type));
- }
- if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) {
- // Add array coordinate for textures
- if (meta->sampler.is_array) {
- Id array = AsInt(Visit(meta->array));
- if (type == Type::Float) {
- array = OpConvertSToF(t_float, array);
- }
- coords.push_back(array);
- }
- }
- return AssembleVector(coords, type);
- }
-
- Id GetOffsetCoordinates(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- std::vector<Id> coords;
- coords.reserve(meta.aoffi.size());
- for (const auto& coord : meta.aoffi) {
- coords.push_back(AsInt(Visit(coord)));
- }
- return AssembleVector(coords, Type::Int);
- }
-
- std::pair<Id, Id> GetDerivatives(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const auto& derivatives = meta.derivates;
- ASSERT(derivatives.size() % 2 == 0);
-
- const std::size_t components = derivatives.size() / 2;
- std::vector<Id> dx, dy;
- dx.reserve(components);
- dy.reserve(components);
- for (std::size_t index = 0; index < components; ++index) {
- dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0))));
- dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1))));
- }
- return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)};
- }
-
- Expression GetTextureElement(Operation operation, Id sample_value, Type type) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const auto type_def = GetTypeDefinition(type);
- return {OpCompositeExtract(type_def, sample_value, meta.element), type};
- }
-
- Expression Texture(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const bool can_implicit = stage == ShaderType::Fragment;
- const Id sampler = GetTextureSampler(operation);
- const Id coords = GetCoordinates(operation, Type::Float);
-
- std::vector<Id> operands;
- spv::ImageOperandsMask mask{};
- if (meta.bias) {
- mask = mask | spv::ImageOperandsMask::Bias;
- operands.push_back(AsFloat(Visit(meta.bias)));
- }
-
- if (!can_implicit) {
- mask = mask | spv::ImageOperandsMask::Lod;
- operands.push_back(v_float_zero);
- }
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- if (meta.depth_compare) {
- // Depth sampling
- UNIMPLEMENTED_IF(meta.bias);
- const Id dref = AsFloat(Visit(meta.depth_compare));
- if (can_implicit) {
- return {
- OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
- Type::Float};
- } else {
- return {
- OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
- Type::Float};
- }
- }
-
- Id texture;
- if (can_implicit) {
- texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
- } else {
- texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
- }
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression TextureLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const Id sampler = GetTextureSampler(operation);
- const Id coords = GetCoordinates(operation, Type::Float);
- const Id lod = AsFloat(Visit(meta.lod));
-
- spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
- std::vector<Id> operands{lod};
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- if (meta.sampler.is_shadow) {
- const Id dref = AsFloat(Visit(meta.depth_compare));
- return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
- Type::Float};
- }
- const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const Id coords = GetCoordinates(operation, Type::Float);
-
- spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
- std::vector<Id> operands;
- Id texture{};
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- if (meta.sampler.is_shadow) {
- texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
- AsFloat(Visit(meta.depth_compare)), mask, operands);
- } else {
- u32 component_value = 0;
- if (meta.component) {
- const auto component = std::get_if<ImmediateNode>(&*meta.component);
- ASSERT_MSG(component, "Component is not an immediate value");
- component_value = component->GetValue();
- }
- texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
- Constant(t_uint, component_value), mask, operands);
- }
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression TextureQueryDimensions(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
- UNIMPLEMENTED_IF(meta.depth_compare);
-
- const auto image_id = GetTextureImage(operation);
- if (meta.element == 3) {
- return {OpImageQueryLevels(t_int, image_id), Type::Int};
- }
-
- const Id lod = AsUint(Visit(operation[0]));
- const std::size_t coords_count = [&meta] {
- switch (const auto type = meta.sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return 1;
- case Tegra::Shader::TextureType::Texture2D:
- case Tegra::Shader::TextureType::TextureCube:
- return 2;
- case Tegra::Shader::TextureType::Texture3D:
- return 3;
- default:
- UNREACHABLE_MSG("Invalid texture type={}", type);
- return 2;
- }
- }();
-
- if (meta.element >= coords_count) {
- return {v_float_zero, Type::Float};
- }
-
- const std::array<Id, 3> types = {t_int, t_int2, t_int3};
- const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod);
- const Id size = OpCompositeExtract(t_int, sizes, meta.element);
- return {size, Type::Int};
- }
-
- Expression TextureQueryLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
- UNIMPLEMENTED_IF(meta.depth_compare);
-
- if (meta.element >= 2) {
- UNREACHABLE_MSG("Invalid element");
- return {v_float_zero, Type::Float};
- }
- const auto sampler_id = GetTextureSampler(operation);
-
- const Id multiplier = Constant(t_float, 256.0f);
- const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier);
-
- const Id coords = GetCoordinates(operation, Type::Float);
- Id size = OpImageQueryLod(t_float2, sampler_id, coords);
- size = OpFMul(t_float2, size, multipliers);
- size = OpConvertFToS(t_int2, size);
- return GetTextureElement(operation, size, Type::Int);
- }
-
- Expression TexelFetch(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(meta.depth_compare);
-
- const Id image = GetTextureImage(operation);
- const Id coords = GetCoordinates(operation, Type::Int);
-
- spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
- std::vector<Id> operands;
- Id fetch;
-
- if (meta.lod && !meta.sampler.is_buffer) {
- mask = mask | spv::ImageOperandsMask::Lod;
- operands.push_back(AsInt(Visit(meta.lod)));
- }
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- fetch = OpImageFetch(t_float4, image, coords, mask, operands);
- return GetTextureElement(operation, fetch, Type::Float);
- }
-
- Expression TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
-
- const Id sampler = GetTextureSampler(operation);
- const Id coords = GetCoordinates(operation, Type::Float);
- const auto [dx, dy] = GetDerivatives(operation);
- const std::vector grad = {dx, dy};
-
- static constexpr auto mask = spv::ImageOperandsMask::Grad;
- const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression ImageLoad(Operation operation) {
- if (!device.IsFormatlessImageLoadSupported()) {
- return {v_float_zero, Type::Float};
- }
-
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
- const Id coords = GetCoordinates(operation, Type::Int);
- const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
-
- return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
- }
-
- Expression ImageStore(Operation operation) {
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
- std::vector<Id> colors;
- for (const auto& value : meta.values) {
- colors.push_back(AsUint(Visit(value)));
- }
-
- const Id coords = GetCoordinates(operation, Type::Int);
- const Id texel = OpCompositeConstruct(t_uint4, colors);
-
- OpImageWrite(GetImage(operation), coords, texel, {});
- return {};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id)>
- Expression AtomicImage(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- ASSERT(meta.values.size() == 1);
-
- const Id coordinate = GetCoordinates(operation, Type::Int);
- const Id image = images.at(meta.image.index).image;
- const Id sample = v_uint_zero;
- const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
-
- const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = v_uint_zero;
- const Id value = AsUint(Visit(meta.values[0]));
- return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id)>
- Expression Atomic(Operation operation) {
- Id pointer;
- if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
- pointer = GetSharedMemoryPointer(*smem);
- } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- pointer = GetGlobalMemoryPointer(*gmem);
- } else {
- UNREACHABLE();
- return {v_float_zero, Type::Float};
- }
- const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = v_uint_zero;
- const Id value = AsUint(Visit(operation[1]));
-
- return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id)>
- Expression Reduce(Operation operation) {
- Atomic<func>(operation);
- return {};
- }
-
- Expression Branch(Operation operation) {
- const auto& target = std::get<ImmediateNode>(*operation[0]);
- OpStore(jmp_to, Constant(t_uint, target.GetValue()));
- OpBranch(continue_label);
- inside_branch = true;
- if (!conditional_branch_set) {
- AddLabel();
- }
- return {};
- }
-
- Expression BranchIndirect(Operation operation) {
- const Id op_a = AsUint(Visit(operation[0]));
-
- OpStore(jmp_to, op_a);
- OpBranch(continue_label);
- inside_branch = true;
- if (!conditional_branch_set) {
- AddLabel();
- }
- return {};
- }
-
- Expression PushFlowStack(Operation operation) {
- const auto& target = std::get<ImmediateNode>(*operation[0]);
- const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
- const Id current = OpLoad(t_uint, flow_stack_top);
- const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1));
- const Id access = OpAccessChain(t_func_uint, flow_stack, current);
-
- OpStore(access, Constant(t_uint, target.GetValue()));
- OpStore(flow_stack_top, next);
- return {};
- }
-
- Expression PopFlowStack(Operation operation) {
- const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
- const Id current = OpLoad(t_uint, flow_stack_top);
- const Id previous = OpISub(t_uint, current, Constant(t_uint, 1));
- const Id access = OpAccessChain(t_func_uint, flow_stack, previous);
- const Id target = OpLoad(t_uint, access);
-
- OpStore(flow_stack_top, previous);
- OpStore(jmp_to, target);
- OpBranch(continue_label);
- inside_branch = true;
- if (!conditional_branch_set) {
- AddLabel();
- }
- return {};
- }
-
- Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
- using Compare = Maxwell::ComparisonOp;
- switch (compare_op) {
- case Compare::NeverOld:
- return v_false; // Never let the test pass
- case Compare::LessOld:
- return OpFOrdLessThan(t_bool, operand_1, operand_2);
- case Compare::EqualOld:
- return OpFOrdEqual(t_bool, operand_1, operand_2);
- case Compare::LessEqualOld:
- return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
- case Compare::GreaterOld:
- return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
- case Compare::NotEqualOld:
- return OpFOrdNotEqual(t_bool, operand_1, operand_2);
- case Compare::GreaterEqualOld:
- return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
- default:
- UNREACHABLE();
- return v_true;
- }
- }
-
- void AlphaTest(Id pointer) {
- if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
- return;
- }
- const Id true_label = OpLabel();
- const Id discard_label = OpLabel();
- const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
- const Id alpha_value = OpLoad(t_float, pointer);
- const Id condition =
- MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
-
- OpBranchConditional(condition, true_label, discard_label);
- AddLabel(discard_label);
- OpKill();
- AddLabel(true_label);
- }
-
- void PreExit() {
- if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
- const u32 position_index = out_indices.position.value();
- const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
- const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
- Id depth = OpLoad(t_float, z_pointer);
- depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer));
- depth = OpFMul(t_float, depth, Constant(t_float, 0.5f));
- OpStore(z_pointer, depth);
- }
- if (stage == ShaderType::Fragment) {
- const auto SafeGetRegister = [this](u32 reg) {
- if (const auto it = registers.find(reg); it != registers.end()) {
- return OpLoad(t_float, it->second);
- }
- return v_float_zero;
- };
-
- UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
- "Sample mask write is unimplemented");
-
- // Write the color outputs using the data in the shader registers, disabled
- // rendertargets/components are skipped in the register assignment.
- u32 current_reg = 0;
- for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
- for (u32 component = 0; component < 4; ++component) {
- if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
- continue;
- }
- const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
- OpStore(pointer, SafeGetRegister(current_reg));
- if (rt == 0 && component == 3) {
- AlphaTest(pointer);
- }
- ++current_reg;
- }
- }
- if (header.ps.omap.depth) {
- // The depth output is always 2 registers after the last color output, and
- // current_reg already contains one past the last color register.
- OpStore(frag_depth, SafeGetRegister(current_reg + 1));
- }
- }
- }
-
- Expression Exit(Operation operation) {
- PreExit();
- inside_branch = true;
- if (conditional_branch_set) {
- OpReturn();
- } else {
- const Id dummy = OpLabel();
- OpBranch(dummy);
- AddLabel(dummy);
- OpReturn();
- AddLabel();
- }
- return {};
- }
-
- Expression Discard(Operation operation) {
- inside_branch = true;
- if (conditional_branch_set) {
- OpKill();
- } else {
- const Id dummy = OpLabel();
- OpBranch(dummy);
- AddLabel(dummy);
- OpKill();
- AddLabel();
- }
- return {};
- }
-
- Expression EmitVertex(Operation) {
- OpEmitVertex();
- return {};
- }
-
- Expression EndPrimitive(Operation operation) {
- OpEndPrimitive();
- return {};
- }
-
- Expression InvocationId(Operation) {
- return {OpLoad(t_int, invocation_id), Type::Int};
- }
-
- Expression YNegate(Operation) {
- LOG_WARNING(Render_Vulkan, "(STUBBED)");
- return {Constant(t_float, 1.0f), Type::Float};
- }
-
- template <u32 element>
- Expression LocalInvocationId(Operation) {
- const Id id = OpLoad(t_uint3, local_invocation_id);
- return {OpCompositeExtract(t_uint, id, element), Type::Uint};
- }
-
- template <u32 element>
- Expression WorkGroupId(Operation operation) {
- const Id id = OpLoad(t_uint3, workgroup_id);
- return {OpCompositeExtract(t_uint, id, element), Type::Uint};
- }
-
- Expression BallotThread(Operation operation) {
- const Id predicate = AsBool(Visit(operation[0]));
- const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate);
-
- if (!device.IsWarpSizePotentiallyBiggerThanGuest()) {
- // Guest-like devices can just return the first index.
- return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint};
- }
-
- // The others will have to return what is local to the current thread.
- // For instance a device with a warp size of 64 will return the upper uint when the current
- // thread is 38.
- const Id tid = OpLoad(t_uint, thread_id);
- const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5));
- return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint};
- }
-
- template <Id (Module::*func)(Id, Id)>
- Expression Vote(Operation operation) {
- // TODO(Rodrigo): Handle devices with different warp sizes
- const Id predicate = AsBool(Visit(operation[0]));
- return {(this->*func)(t_bool, predicate), Type::Bool};
- }
-
- Expression ThreadId(Operation) {
- return {OpLoad(t_uint, thread_id), Type::Uint};
- }
-
- template <std::size_t index>
- Expression ThreadMask(Operation) {
- // TODO(Rodrigo): Handle devices with different warp sizes
- const Id mask = thread_masks[index];
- return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
- }
-
- Expression ShuffleIndexed(Operation operation) {
- const Id value = AsFloat(Visit(operation[0]));
- const Id index = AsUint(Visit(operation[1]));
- return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
- }
-
- Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
- return {};
- }
-
- const auto scope = spv::Scope::Workgroup;
- const auto memory = spv::Scope::Workgroup;
- const auto semantics =
- spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
- OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
- Constant(t_uint, static_cast<u32>(memory)),
- Constant(t_uint, static_cast<u32>(semantics)));
- return {};
- }
-
- template <spv::Scope scope>
- Expression MemoryBarrier(Operation) {
- const auto semantics =
- spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
- spv::MemorySemanticsMask::WorkgroupMemory |
- spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
-
- OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
- Constant(t_uint, static_cast<u32>(semantics)));
- return {};
- }
-
- Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
- const Id id = OpVariable(type, storage);
- Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
- AddGlobalVariable(Name(id, std::move(name)));
- interfaces.push_back(id);
- return id;
- }
-
- Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) {
- return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
- }
-
- template <typename... Args>
- Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
- std::vector<Id> members;
- auto elements = {elements_...};
- for (const auto element : elements) {
- members.push_back(Constant(t_uint, element));
- }
-
- return OpAccessChain(pointer_type, composite, members);
- }
-
- Id As(Expression expr, Type wanted_type) {
- switch (wanted_type) {
- case Type::Bool:
- return AsBool(expr);
- case Type::Bool2:
- return AsBool2(expr);
- case Type::Float:
- return AsFloat(expr);
- case Type::Int:
- return AsInt(expr);
- case Type::Uint:
- return AsUint(expr);
- case Type::HalfFloat:
- return AsHalfFloat(expr);
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsBool(Expression expr) {
- ASSERT(expr.type == Type::Bool);
- return expr.id;
- }
-
- Id AsBool2(Expression expr) {
- ASSERT(expr.type == Type::Bool2);
- return expr.id;
- }
-
- Id AsFloat(Expression expr) {
- switch (expr.type) {
- case Type::Float:
- return expr.id;
- case Type::Int:
- case Type::Uint:
- return OpBitcast(t_float, expr.id);
- case Type::HalfFloat:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_float, expr.id);
- }
- return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id));
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsInt(Expression expr) {
- switch (expr.type) {
- case Type::Int:
- return expr.id;
- case Type::Float:
- case Type::Uint:
- return OpBitcast(t_int, expr.id);
- case Type::HalfFloat:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_int, expr.id);
- }
- return OpPackHalf2x16(t_int, expr.id);
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsUint(Expression expr) {
- switch (expr.type) {
- case Type::Uint:
- return expr.id;
- case Type::Float:
- case Type::Int:
- return OpBitcast(t_uint, expr.id);
- case Type::HalfFloat:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_uint, expr.id);
- }
- return OpPackHalf2x16(t_uint, expr.id);
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsHalfFloat(Expression expr) {
- switch (expr.type) {
- case Type::HalfFloat:
- return expr.id;
- case Type::Float:
- case Type::Int:
- case Type::Uint:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_half, expr.id);
- }
- return OpUnpackHalf2x16(t_half, AsUint(expr));
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id GetHalfScalarFromFloat(Id value) {
- if (device.IsFloat16Supported()) {
- return OpFConvert(t_scalar_half, value);
- }
- return value;
- }
-
- Id GetFloatFromHalfScalar(Id value) {
- if (device.IsFloat16Supported()) {
- return OpFConvert(t_float, value);
- }
- return value;
- }
-
- AttributeType GetAttributeType(u32 location) const {
- if (stage != ShaderType::Vertex) {
- return {Type::Float, t_in_float, t_in_float4};
- }
- switch (specialization.attribute_types.at(location)) {
- case Maxwell::VertexAttribute::Type::SignedNorm:
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- case Maxwell::VertexAttribute::Type::SignedScaled:
- case Maxwell::VertexAttribute::Type::Float:
- return {Type::Float, t_in_float, t_in_float4};
- case Maxwell::VertexAttribute::Type::SignedInt:
- return {Type::Int, t_in_int, t_in_int4};
- case Maxwell::VertexAttribute::Type::UnsignedInt:
- return {Type::Uint, t_in_uint, t_in_uint4};
- default:
- UNREACHABLE();
- return {Type::Float, t_in_float, t_in_float4};
- }
- }
-
- Id GetTypeDefinition(Type type) const {
- switch (type) {
- case Type::Bool:
- return t_bool;
- case Type::Bool2:
- return t_bool2;
- case Type::Float:
- return t_float;
- case Type::Int:
- return t_int;
- case Type::Uint:
- return t_uint;
- case Type::HalfFloat:
- return t_half;
- default:
- UNREACHABLE();
- return {};
- }
- }
-
- std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
- switch (type) {
- case Type::Float:
- return {t_float, t_float2, t_float3, t_float4};
- case Type::Int:
- return {t_int, t_int2, t_int3, t_int4};
- case Type::Uint:
- return {t_uint, t_uint2, t_uint3, t_uint4};
- default:
- UNIMPLEMENTED();
- return {};
- }
- }
-
- std::tuple<Id, Id> CreateFlowStack() {
- // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
- // that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- constexpr auto storage_class = spv::StorageClass::Function;
-
- const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
- const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
- ConstantNull(flow_stack_type));
- const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0));
- AddLocalVariable(stack);
- AddLocalVariable(top);
- return std::tie(stack, top);
- }
-
- std::pair<Id, Id> GetFlowStack(Operation operation) {
- const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
- switch (stack_class) {
- case MetaStackClass::Ssy:
- return {ssy_flow_stack, ssy_flow_stack_top};
- case MetaStackClass::Pbk:
- return {pbk_flow_stack, pbk_flow_stack_top};
- }
- UNREACHABLE();
- return {};
- }
-
- Id GetGlobalMemoryPointer(const GmemNode& gmem) {
- const Id real = AsUint(Visit(gmem.GetRealAddress()));
- const Id base = AsUint(Visit(gmem.GetBaseAddress()));
- const Id diff = OpISub(t_uint, real, base);
- const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
- const Id buffer = global_buffers.at(gmem.GetDescriptor());
- return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
- }
-
- Id GetSharedMemoryPointer(const SmemNode& smem) {
- ASSERT(stage == ShaderType::Compute);
- Id address = AsUint(Visit(smem.GetAddress()));
- address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
- return OpAccessChain(t_smem_uint, shared_memory, address);
- }
-
- static constexpr std::array operation_decompilers = {
- &SPIRVDecompiler::Assign,
-
- &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
- Type::Float>,
-
- &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
- &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
- &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
- &SPIRVDecompiler::FCastHalf<0>,
- &SPIRVDecompiler::FCastHalf<1>,
- &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
- &SPIRVDecompiler::FSwizzleAdd,
-
- &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
-
- &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
- &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
- &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
-
- &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
- &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
- &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
-
- &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
- &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
- &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
- &SPIRVDecompiler::HNegate,
- &SPIRVDecompiler::HClamp,
- &SPIRVDecompiler::HCastFloat,
- &SPIRVDecompiler::HUnpack,
- &SPIRVDecompiler::HMergeF32,
- &SPIRVDecompiler::HMergeHN<0>,
- &SPIRVDecompiler::HMergeHN<1>,
- &SPIRVDecompiler::HPack2,
-
- &SPIRVDecompiler::LogicalAssign,
- &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
- &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
- &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
- &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
- &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2,
- Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>,
-
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::LogicalFOrdered,
- &SPIRVDecompiler::LogicalFUnordered,
- &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
-
- &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
-
- &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
-
- &SPIRVDecompiler::LogicalAddCarry,
-
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
- // TODO(Rodrigo): Should these use the OpFUnord* variants?
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
-
- &SPIRVDecompiler::Texture,
- &SPIRVDecompiler::TextureLod,
- &SPIRVDecompiler::TextureGather,
- &SPIRVDecompiler::TextureQueryDimensions,
- &SPIRVDecompiler::TextureQueryLod,
- &SPIRVDecompiler::TexelFetch,
- &SPIRVDecompiler::TextureGradient,
-
- &SPIRVDecompiler::ImageLoad,
- &SPIRVDecompiler::ImageStore,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
-
- &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Branch,
- &SPIRVDecompiler::BranchIndirect,
- &SPIRVDecompiler::PushFlowStack,
- &SPIRVDecompiler::PopFlowStack,
- &SPIRVDecompiler::Exit,
- &SPIRVDecompiler::Discard,
-
- &SPIRVDecompiler::EmitVertex,
- &SPIRVDecompiler::EndPrimitive,
-
- &SPIRVDecompiler::InvocationId,
- &SPIRVDecompiler::YNegate,
- &SPIRVDecompiler::LocalInvocationId<0>,
- &SPIRVDecompiler::LocalInvocationId<1>,
- &SPIRVDecompiler::LocalInvocationId<2>,
- &SPIRVDecompiler::WorkGroupId<0>,
- &SPIRVDecompiler::WorkGroupId<1>,
- &SPIRVDecompiler::WorkGroupId<2>,
-
- &SPIRVDecompiler::BallotThread,
- &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>,
- &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>,
- &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
-
- &SPIRVDecompiler::ThreadId,
- &SPIRVDecompiler::ThreadMask<0>, // Eq
- &SPIRVDecompiler::ThreadMask<1>, // Ge
- &SPIRVDecompiler::ThreadMask<2>, // Gt
- &SPIRVDecompiler::ThreadMask<3>, // Le
- &SPIRVDecompiler::ThreadMask<4>, // Lt
- &SPIRVDecompiler::ShuffleIndexed,
-
- &SPIRVDecompiler::Barrier,
- &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
- &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
- };
- static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
- const Device& device;
- const ShaderIR& ir;
- const ShaderType stage;
- const Tegra::Shader::Header header;
- const Registry& registry;
- const Specialization& specialization;
- std::unordered_map<u8, VaryingTFB> transform_feedback;
-
- const Id t_void = Name(TypeVoid(), "void");
-
- const Id t_bool = Name(TypeBool(), "bool");
- const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
-
- const Id t_int = Name(TypeInt(32, true), "int");
- const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
- const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
- const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
-
- const Id t_uint = Name(TypeInt(32, false), "uint");
- const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
- const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
- const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
-
- const Id t_float = Name(TypeFloat(32), "float");
- const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
- const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
- const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
-
- const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
- const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
-
- const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
-
- const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
- const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int");
- const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4");
- const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
- const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3");
- const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4");
- const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
- const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2");
- const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3");
- const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
-
- const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int");
-
- const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
- const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
-
- const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
- const Id t_cbuf_std140 = Decorate(
- Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"),
- spv::Decoration::ArrayStride, 16U);
- const Id t_cbuf_scalar = Decorate(
- Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"),
- spv::Decoration::ArrayStride, 4U);
- const Id t_cbuf_std140_struct = MemberDecorate(
- Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_cbuf_scalar_struct = MemberDecorate(
- Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
- const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
-
- Id t_smem_uint{};
-
- const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
- const Id t_gmem_array =
- Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
- const Id t_gmem_struct = MemberDecorate(
- Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
-
- const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
-
- const Id v_float_zero = Constant(t_float, 0.0f);
- const Id v_float_one = Constant(t_float, 1.0f);
- const Id v_uint_zero = Constant(t_uint, 0);
-
- // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
- const Id v_varying_default =
- ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one);
-
- const Id v_true = ConstantTrue(t_bool);
- const Id v_false = ConstantFalse(t_bool);
-
- Id t_scalar_half{};
- Id t_half{};
-
- Id out_vertex{};
- Id in_vertex{};
- std::map<u32, Id> registers;
- std::map<u32, Id> custom_variables;
- std::map<Tegra::Shader::Pred, Id> predicates;
- std::map<u32, Id> flow_variables;
- Id local_memory{};
- Id shared_memory{};
- std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
- std::map<Attribute::Index, Id> input_attributes;
- std::unordered_map<u8, GenericVaryingDescription> output_attributes;
- std::map<u32, Id> constant_buffers;
- std::map<GlobalMemoryBase, Id> global_buffers;
- std::map<u32, TexelBuffer> uniform_texels;
- std::map<u32, SampledImage> sampled_images;
- std::map<u32, StorageImage> images;
-
- std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
- Id instance_index{};
- Id vertex_index{};
- Id base_instance{};
- Id base_vertex{};
- Id frag_depth{};
- Id frag_coord{};
- Id front_facing{};
- Id point_coord{};
- Id tess_level_outer{};
- Id tess_level_inner{};
- Id tess_coord{};
- Id invocation_id{};
- Id workgroup_id{};
- Id local_invocation_id{};
- Id thread_id{};
- std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
-
- VertexIndices in_indices;
- VertexIndices out_indices;
-
- std::vector<Id> interfaces;
-
- Id jmp_to{};
- Id ssy_flow_stack_top{};
- Id pbk_flow_stack_top{};
- Id ssy_flow_stack{};
- Id pbk_flow_stack{};
- Id continue_label{};
- std::map<u32, Id> labels;
-
- bool conditional_branch_set{};
- bool inside_branch{};
-};
-
-class ExprDecompiler {
-public:
- explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
-
- Id operator()(const ExprAnd& expr) {
- const Id type_def = decomp.GetTypeDefinition(Type::Bool);
- const Id op1 = Visit(expr.operand1);
- const Id op2 = Visit(expr.operand2);
- return decomp.OpLogicalAnd(type_def, op1, op2);
- }
-
- Id operator()(const ExprOr& expr) {
- const Id type_def = decomp.GetTypeDefinition(Type::Bool);
- const Id op1 = Visit(expr.operand1);
- const Id op2 = Visit(expr.operand2);
- return decomp.OpLogicalOr(type_def, op1, op2);
- }
-
- Id operator()(const ExprNot& expr) {
- const Id type_def = decomp.GetTypeDefinition(Type::Bool);
- const Id op1 = Visit(expr.operand1);
- return decomp.OpLogicalNot(type_def, op1);
- }
-
- Id operator()(const ExprPredicate& expr) {
- const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
- return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred));
- }
-
- Id operator()(const ExprCondCode& expr) {
- return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
- }
-
- Id operator()(const ExprVar& expr) {
- return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index));
- }
-
- Id operator()(const ExprBoolean& expr) {
- return expr.value ? decomp.v_true : decomp.v_false;
- }
-
- Id operator()(const ExprGprEqual& expr) {
- const Id target = decomp.Constant(decomp.t_uint, expr.value);
- Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
- gpr = decomp.OpBitcast(decomp.t_uint, gpr);
- return decomp.OpIEqual(decomp.t_bool, gpr, target);
- }
-
- Id Visit(const Expr& node) {
- return std::visit(*this, *node);
- }
-
-private:
- SPIRVDecompiler& decomp;
-};
-
-class ASTDecompiler {
-public:
- explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- const Id then_label = decomp.OpLabel();
- const Id endif_label = decomp.OpLabel();
- decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
- decomp.OpBranchConditional(condition, then_label, endif_label);
- decomp.AddLabel(then_label);
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.OpBranch(endif_label);
- decomp.AddLabel(endif_label);
- }
-
- void operator()([[maybe_unused]] const ASTIfElse& ast) {
- UNREACHABLE();
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTBlockDecoded& ast) {
- decomp.VisitBasicBlock(ast.nodes);
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- decomp.OpStore(decomp.flow_variables.at(ast.index), condition);
- }
-
- void operator()([[maybe_unused]] const ASTLabel& ast) {
- // Do nothing
- }
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTDoWhile& ast) {
- const Id loop_label = decomp.OpLabel();
- const Id endloop_label = decomp.OpLabel();
- const Id loop_start_block = decomp.OpLabel();
- const Id loop_continue_block = decomp.OpLabel();
- current_loop_exit = endloop_label;
- decomp.OpBranch(loop_label);
- decomp.AddLabel(loop_label);
- decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
- decomp.OpBranch(loop_start_block);
- decomp.AddLabel(loop_start_block);
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.OpBranch(loop_continue_block);
- decomp.AddLabel(loop_continue_block);
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- decomp.OpBranchConditional(condition, loop_label, endloop_label);
- decomp.AddLabel(endloop_label);
- }
-
- void operator()(const ASTReturn& ast) {
- if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- const Id then_label = decomp.OpLabel();
- const Id endif_label = decomp.OpLabel();
- decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
- decomp.OpBranchConditional(condition, then_label, endif_label);
- decomp.AddLabel(then_label);
- if (ast.kills) {
- decomp.OpKill();
- } else {
- decomp.PreExit();
- decomp.OpReturn();
- }
- decomp.AddLabel(endif_label);
- } else {
- const Id next_block = decomp.OpLabel();
- decomp.OpBranch(next_block);
- decomp.AddLabel(next_block);
- if (ast.kills) {
- decomp.OpKill();
- } else {
- decomp.PreExit();
- decomp.OpReturn();
- }
- decomp.AddLabel(decomp.OpLabel());
- }
- }
-
- void operator()(const ASTBreak& ast) {
- if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- const Id then_label = decomp.OpLabel();
- const Id endif_label = decomp.OpLabel();
- decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
- decomp.OpBranchConditional(condition, then_label, endif_label);
- decomp.AddLabel(then_label);
- decomp.OpBranch(current_loop_exit);
- decomp.AddLabel(endif_label);
- } else {
- const Id next_block = decomp.OpLabel();
- decomp.OpBranch(next_block);
- decomp.AddLabel(next_block);
- decomp.OpBranch(current_loop_exit);
- decomp.AddLabel(decomp.OpLabel());
- }
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
-private:
- SPIRVDecompiler& decomp;
- Id current_loop_exit{};
-};
-
-void SPIRVDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; i++) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- Name(id, fmt::format("flow_var_{}", i));
- flow_variables.emplace(i, AddGlobalVariable(id));
- }
-
- DefinePrologue();
-
- const ASTNode program = ir.GetASTProgram();
- ASTDecompiler decompiler{*this};
- decompiler.Visit(program);
-
- const Id next_block = OpLabel();
- OpBranch(next_block);
- AddLabel(next_block);
-}
-
-} // Anonymous namespace
-
-ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_buffers.emplace_back(GlobalBufferEntry{
- .cbuf_index = base.cbuf_index,
- .cbuf_offset = base.cbuf_offset,
- .is_written = usage.is_written,
- });
- }
- for (const auto& sampler : ir.GetSamplers()) {
- if (sampler.is_buffer) {
- entries.uniform_texels.emplace_back(sampler);
- } else {
- entries.samplers.emplace_back(sampler);
- }
- }
- for (const auto& image : ir.GetImages()) {
- if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
- entries.storage_texels.emplace_back(image);
- } else {
- entries.images.emplace_back(image);
- }
- }
- for (const auto& attribute : ir.GetInputAttributes()) {
- if (IsGenericAttribute(attribute)) {
- entries.attributes.insert(GetGenericAttributeLocation(attribute));
- }
- }
- for (const auto& buffer : entries.const_buffers) {
- entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
- }
- entries.clip_distances = ir.GetClipDistances();
- entries.shader_length = ir.GetLength();
- entries.uses_warps = ir.UsesWarps();
- return entries;
-}
-
-std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- ShaderType stage, const VideoCommon::Shader::Registry& registry,
- const Specialization& specialization) {
- return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
deleted file mode 100644
index 5d94132a5..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <set>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace Vulkan {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-constexpr u32 DESCRIPTOR_SET = 0;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
- explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
- : ConstBuffer{entry_}, index{index_} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index{};
-};
-
-struct GlobalBufferEntry {
- u32 cbuf_index{};
- u32 cbuf_offset{};
- bool is_written{};
-};
-
-struct ShaderEntries {
- u32 NumBindings() const {
- return static_cast<u32>(const_buffers.size() + global_buffers.size() +
- uniform_texels.size() + samplers.size() + storage_texels.size() +
- images.size());
- }
-
- std::vector<ConstBufferEntry> const_buffers;
- std::vector<GlobalBufferEntry> global_buffers;
- std::vector<UniformTexelEntry> uniform_texels;
- std::vector<SamplerEntry> samplers;
- std::vector<StorageTexelEntry> storage_texels;
- std::vector<ImageEntry> images;
- std::set<u32> attributes;
- std::array<bool, Maxwell::NumClipDistances> clip_distances{};
- std::size_t shader_length{};
- u32 enabled_uniform_buffers{};
- bool uses_warps{};
-};
-
-struct Specialization final {
- u32 base_binding{};
-
- // Compute specific
- std::array<u32, 3> workgroup_size{};
- u32 shared_memory_size{};
-
- // Graphics specific
- std::optional<float> point_size;
- std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
- std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
- bool ndc_minus_one_to_one{};
- bool early_fragment_tests{};
- float alpha_test_ref{};
- Maxwell::ComparisonOp alpha_test_func{};
-};
-// Old gcc versions don't consider this trivially copyable.
-// static_assert(std::is_trivially_copyable_v<Specialization>);
-
-struct SPIRVShader {
- std::vector<u32> code;
- ShaderEntries entries;
-};
-
-ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
-
-std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage,
- const VideoCommon::Shader::Registry& registry,
- const Specialization& specialization);
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 7a1232497..5d5329abf 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
+#include "common/literals.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -19,12 +20,15 @@
namespace Vulkan {
namespace {
+
+using namespace Common::Literals;
+
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
-constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
+constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes
-constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
@@ -57,11 +61,15 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
return std::nullopt;
}
-u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
- // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
- std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
- if (type) {
- return *type;
+u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
+ bool try_device_local) {
+ std::optional<u32> type;
+ if (try_device_local) {
+ // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
+ type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
+ if (type) {
+ return *type;
+ }
}
// Otherwise try without the DEVICE_LOCAL_BIT
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
@@ -87,7 +95,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -111,12 +119,21 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.buffer = *stream_buffer,
};
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
- stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
+ VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
- .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
- });
+ .memoryTypeIndex =
+ FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
+ };
+ stream_memory = dev.TryAllocateMemory(stream_memory_info);
+ if (!stream_memory) {
+ LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
+ stream_memory_info.memoryTypeIndex =
+ FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
+ stream_memory = dev.AllocateMemory(stream_memory_info);
+ }
+
if (device.HasDebuggingToolAttached()) {
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 956f86845..e3b7dd61c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
- Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
- StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
- DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
+ Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
+ StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
+ DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
+ VertexBuffers, VertexInput,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
@@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() {
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
flags[index] = true;
}
+ for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
+ flags[index] = true;
+ }
+ for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
+ flags[index] = true;
+ }
return flags;
}
@@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) {
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
+void SetupDirtyLineWidth(Tables& tables) {
+ tables[0][OFF(line_width_smooth)] = LineWidth;
+ tables[0][OFF(line_width_aliased)] = LineWidth;
+}
+
void SetupDirtyCullMode(Tables& tables) {
auto& table = tables[0];
table[OFF(cull_face)] = CullMode;
@@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) {
FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
}
-void SetupDirtyInstanceDivisors(Tables& tables) {
- static constexpr size_t divisor_offset = 3;
- for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
- tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
- tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
- InstanceDivisors;
+void SetupDirtyViewportSwizzles(Tables& tables) {
+ static constexpr size_t swizzle_offset = 6;
+ for (size_t index = 0; index < Regs::NumViewports; ++index) {
+ tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
+ ViewportSwizzles;
}
}
void SetupDirtyVertexAttributes(Tables& tables) {
- FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
+ for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
+ const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
+ FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
+ }
+ FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
}
-void SetupDirtyViewportSwizzles(Tables& tables) {
- static constexpr size_t swizzle_offset = 6;
- for (size_t index = 0; index < Regs::NumViewports; ++index) {
- tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
- ViewportSwizzles;
+void SetupDirtyVertexBindings(Tables& tables) {
+ // Do NOT include stride here, it's implicit in VertexBuffer
+ static constexpr size_t divisor_offset = 3;
+ for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+ const u8 flag = static_cast<u8>(VertexBinding0 + i);
+ tables[0][OFF(instanced_arrays) + i] = VertexInput;
+ tables[1][OFF(instanced_arrays) + i] = flag;
+ tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput;
+ tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag;
}
}
} // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
- auto& tables = gpu.Maxwell3D().dirty.tables;
+ auto& tables{gpu.Maxwell3D().dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
+ SetupDirtyLineWidth(tables);
SetupDirtyCullMode(tables);
SetupDirtyDepthBoundsEnable(tables);
SetupDirtyDepthTestEnable(tables);
@@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
- SetupDirtyInstanceDivisors(tables);
- SetupDirtyVertexAttributes(tables);
SetupDirtyViewportSwizzles(tables);
+ SetupDirtyVertexAttributes(tables);
+ SetupDirtyVertexBindings(tables);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 84e918a71..5f78f6950 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -19,12 +19,19 @@ namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
+ VertexInput,
+ VertexAttribute0,
+ VertexAttribute31 = VertexAttribute0 + 31,
+ VertexBinding0,
+ VertexBinding31 = VertexBinding0 + 31,
+
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
+ LineWidth,
CullMode,
DepthBoundsEnable,
@@ -36,11 +43,9 @@ enum : u8 {
StencilTestEnable,
Blending,
- InstanceDivisors,
- VertexAttributes,
ViewportSwizzles,
- Last
+ Last,
};
static_assert(Last <= std::numeric_limits<u8>::max());
@@ -89,6 +94,10 @@ public:
return Exchange(Dirty::StencilProperties, false);
}
+ bool TouchLineWidth() const {
+ return Exchange(Dirty::LineWidth, false);
+ }
+
bool TouchCullMode() {
return Exchange(Dirty::CullMode, false);
}
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
deleted file mode 100644
index a09fe084e..000000000
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <limits>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-constexpr VkBufferUsageFlags BUFFER_USAGE =
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-
-constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
-constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
-
-constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
-
-/// Find a memory type with the passed requirements
-std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
- VkMemoryPropertyFlags wanted,
- u32 filter = std::numeric_limits<u32>::max()) {
- for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
- const auto flags = properties.memoryTypes[i].propertyFlags;
- if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
- return i;
- }
- }
- return std::nullopt;
-}
-
-/// Get the preferred host visible memory type.
-u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
- u32 filter = std::numeric_limits<u32>::max()) {
- // Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
- // Otherwise search for a host visible allocation.
- static constexpr auto HOST_MEMORY =
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
- static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-
- std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
- if (!preferred_type) {
- preferred_type = FindMemoryType(properties, HOST_MEMORY);
- ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
- }
- return preferred_type.value_or(0);
-}
-
-} // Anonymous namespace
-
-VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_)
- : device{device_}, scheduler{scheduler_} {
- CreateBuffers();
- ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
- ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
-}
-
-VKStreamBuffer::~VKStreamBuffer() = default;
-
-std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
- ASSERT(size <= stream_buffer_size);
- mapped_size = size;
-
- if (alignment > 0) {
- offset = Common::AlignUp(offset, alignment);
- }
-
- WaitPendingOperations(offset);
-
- if (offset + size > stream_buffer_size) {
- // The buffer would overflow, save the amount of used watches and reset the state.
- invalidation_mark = current_watch_cursor;
- current_watch_cursor = 0;
- offset = 0;
-
- // Swap watches and reset waiting cursors.
- std::swap(previous_watches, current_watches);
- wait_cursor = 0;
- wait_bound = 0;
-
- // Ensure that we don't wait for uncommitted fences.
- scheduler.Flush();
- }
-
- return std::make_pair(memory.Map(offset, size), offset);
-}
-
-void VKStreamBuffer::Unmap(u64 size) {
- ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
-
- memory.Unmap();
-
- offset += size;
-
- if (current_watch_cursor + 1 >= current_watches.size()) {
- // Ensure that there are enough watches.
- ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
- }
- auto& watch = current_watches[current_watch_cursor++];
- watch.upper_bound = offset;
- watch.tick = scheduler.CurrentTick();
-}
-
-void VKStreamBuffer::CreateBuffers() {
- const auto memory_properties = device.GetPhysical().GetMemoryProperties();
- const u32 preferred_type = GetMemoryType(memory_properties);
- const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
-
- // Substract from the preferred heap size some bytes to avoid getting out of memory.
- const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
- // As per DXVK's example, using `heap_size / 2`
- const VkDeviceSize allocable_size = heap_size / 2;
- buffer = device.GetLogical().CreateBuffer({
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
- .usage = BUFFER_USAGE,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
-
- const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
- const u32 required_flags = requirements.memoryTypeBits;
- stream_buffer_size = static_cast<u64>(requirements.size);
-
- memory = device.GetLogical().AllocateMemory({
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = nullptr,
- .allocationSize = requirements.size,
- .memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
- });
- buffer.BindMemory(*memory, 0);
-}
-
-void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
- watches.resize(watches.size() + grow_size);
-}
-
-void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
- if (!invalidation_mark) {
- return;
- }
- while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
- auto& watch = previous_watches[wait_cursor];
- wait_bound = watch.upper_bound;
- scheduler.Wait(watch.tick);
- ++wait_cursor;
- }
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
deleted file mode 100644
index 2e9c8cb46..000000000
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-
-class Device;
-class VKFenceWatch;
-class VKScheduler;
-
-class VKStreamBuffer final {
-public:
- explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler);
- ~VKStreamBuffer();
-
- /**
- * Reserves a region of memory from the stream buffer.
- * @param size Size to reserve.
- * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
- */
- std::pair<u8*, u64> Map(u64 size, u64 alignment);
-
- /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
- void Unmap(u64 size);
-
- VkBuffer Handle() const noexcept {
- return *buffer;
- }
-
- u64 Address() const noexcept {
- return 0;
- }
-
-private:
- struct Watch {
- u64 tick{};
- u64 upper_bound{};
- };
-
- /// Creates Vulkan buffer handles committing the required the required memory.
- void CreateBuffers();
-
- /// Increases the amount of watches available.
- void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
-
- void WaitPendingOperations(u64 requested_upper_bound);
-
- const Device& device; ///< Vulkan device manager.
- VKScheduler& scheduler; ///< Command scheduler.
-
- vk::Buffer buffer; ///< Mapped buffer.
- vk::DeviceMemory memory; ///< Memory allocation.
- u64 stream_buffer_size{}; ///< Stream buffer size.
-
- u64 offset{}; ///< Buffer iterator.
- u64 mapped_size{}; ///< Size reserved for the current copy.
-
- std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
- std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
- std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
-
- std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
- std::size_t wait_cursor{}; ///< Last watch being waited for completion.
- u64 wait_bound{}; ///< Highest offset being watched for completion.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index dfd5c65ba..d990eefba 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul
VKSwapchain::~VKSwapchain() = default;
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
+ is_outdated = false;
+ is_suboptimal = false;
+
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
@@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
resource_ticks.resize(image_count);
}
-bool VKSwapchain::AcquireNextImage() {
- const VkResult result =
- device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
- *present_semaphores[frame_index], {}, &image_index);
-
+void VKSwapchain::AcquireNextImage() {
+ const VkResult result = device.GetLogical().AcquireNextImageKHR(
+ *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
+ VK_NULL_HANDLE, &image_index);
+ switch (result) {
+ case VK_SUCCESS:
+ break;
+ case VK_SUBOPTIMAL_KHR:
+ is_suboptimal = true;
+ break;
+ case VK_ERROR_OUT_OF_DATE_KHR:
+ is_outdated = true;
+ break;
+ default:
+ LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
+ break;
+ }
scheduler.Wait(resource_ticks[image_index]);
- return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR;
+ resource_ticks[image_index] = scheduler.CurrentTick();
}
-bool VKSwapchain::Present(VkSemaphore render_semaphore) {
+void VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
- bool recreated = false;
-
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
@@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
-
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
@@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
- if (current_width > 0 && current_height > 0) {
- Create(current_width, current_height, current_srgb);
- recreated = true;
- }
+ is_outdated = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
break;
}
-
- resource_ticks[image_index] = scheduler.CurrentTick();
- frame_index = (frame_index + 1) % static_cast<u32>(image_count);
- return recreated;
-}
-
-bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
- // TODO(Rodrigo): Handle framebuffer pixel format changes
- return framebuffer.width != current_width || framebuffer.height != current_height;
+ ++frame_index;
+ if (frame_index >= image_count) {
+ frame_index = 0;
+ }
}
void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
@@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
requested_image_count = capabilities.maxImageCount;
}
-
VkSwapchainCreateInfoKHR swapchain_ci{
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.pNext = nullptr,
@@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,
};
-
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
const std::array<u32, 2> queue_indices{graphics_family, present_family};
@@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
-
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
extent = swapchain_ci.imageExtent;
- current_width = extent.width;
- current_height = extent.height;
current_srgb = srgb;
images = swapchain.GetImages();
@@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
void VKSwapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
- std::generate(present_semaphores.begin(), present_semaphores.end(),
- [this] { return device.GetLogical().CreateSemaphore(); });
+ std::ranges::generate(present_semaphores,
+ [this] { return device.GetLogical().CreateSemaphore(); });
}
void VKSwapchain::CreateImageViews() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index adc8d27cf..35c2cdc14 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -28,14 +28,25 @@ public:
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
- bool AcquireNextImage();
+ void AcquireNextImage();
- /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
- /// recreated. Takes responsability for the ownership of fence.
- bool Present(VkSemaphore render_semaphore);
+ /// Presents the rendered image to the swapchain.
+ void Present(VkSemaphore render_semaphore);
- /// Returns true when the framebuffer layout has changed.
- bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+ /// Returns true when the color space has changed.
+ bool HasColorSpaceChanged(bool is_srgb) const {
+ return current_srgb != is_srgb;
+ }
+
+ /// Returns true when the swapchain is outdated.
+ bool IsOutDated() const {
+ return is_outdated;
+ }
+
+ /// Returns true when the swapchain is suboptimal.
+ bool IsSubOptimal() const {
+ return is_suboptimal;
+ }
VkExtent2D GetSize() const {
return extent;
@@ -61,10 +72,6 @@ public:
return image_format;
}
- bool GetSrgbState() const {
- return current_srgb;
- }
-
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
@@ -92,9 +99,9 @@ private:
VkFormat image_format{};
VkExtent2D extent{};
- u32 current_width{};
- u32 current_height{};
bool current_srgb{};
+ bool is_outdated{};
+ bool is_suboptimal{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 52860b4cf..8e029bcb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,6 +15,7 @@
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
using VideoCore::Surface::IsPixelFormatASTC;
namespace {
-
-constexpr std::array ATTACHMENT_REFERENCES{
- VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
-};
-
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
if (color == std::array<float, 4>{0, 0, 0, 0}) {
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
}
-[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
- if (info.type != ImageType::Buffer) {
- return vk::Buffer{};
- }
- const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
- return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = info.size.width * bytes_per_block,
- .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
-}
-
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
switch (VideoCore::Surface::GetFormatType(format)) {
case VideoCore::Surface::SurfaceType::ColorTexture:
@@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
}
-[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
- const ImageView* image_view) {
- using MaxwellToVK::SurfaceFormat;
- const PixelFormat pixel_format = image_view->format;
- return VkAttachmentDescription{
- .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
- .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
- .samples = image_view->Samples(),
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- };
-}
-
[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
switch (swizzle) {
case SwizzleSource::Zero:
@@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_COMPONENT_SWIZZLE_ZERO;
}
+[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
+ switch (type) {
+ case Shader::TextureType::Color1D:
+ return VK_IMAGE_VIEW_TYPE_1D;
+ case Shader::TextureType::Color2D:
+ return VK_IMAGE_VIEW_TYPE_2D;
+ case Shader::TextureType::ColorCube:
+ return VK_IMAGE_VIEW_TYPE_CUBE;
+ case Shader::TextureType::Color3D:
+ return VK_IMAGE_VIEW_TYPE_3D;
+ case Shader::TextureType::ColorArray1D:
+ return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
+ case Shader::TextureType::ColorArray2D:
+ return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ case Shader::TextureType::ColorArrayCube:
+ return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
+ case Shader::TextureType::Buffer:
+ UNREACHABLE_MSG("Texture buffers can't be image views");
+ return VK_IMAGE_VIEW_TYPE_1D;
+ }
+ UNREACHABLE_MSG("Invalid image view type={}", type);
+ return VK_IMAGE_VIEW_TYPE_2D;
+}
+
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
switch (type) {
case VideoCommon::ImageViewType::e1D:
@@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case VideoCommon::ImageViewType::CubeArray:
return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
case VideoCommon::ImageViewType::Rect:
- LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
+ UNIMPLEMENTED_MSG("Rect image view");
return VK_IMAGE_VIEW_TYPE_2D;
case VideoCommon::ImageViewType::Buffer:
UNREACHABLE_MSG("Texture buffers can't be image views");
@@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
+[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
std::vector<VkBufferCopy> result(copies.size());
std::ranges::transform(
@@ -587,6 +563,28 @@ struct RangedBarrierRange {
}
};
+[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
+ switch (format) {
+ case Shader::ImageFormat::Typeless:
+ break;
+ case Shader::ImageFormat::R8_SINT:
+ return VK_FORMAT_R8_SINT;
+ case Shader::ImageFormat::R8_UINT:
+ return VK_FORMAT_R8_UINT;
+ case Shader::ImageFormat::R16_UINT:
+ return VK_FORMAT_R16_UINT;
+ case Shader::ImageFormat::R16_SINT:
+ return VK_FORMAT_R16_SINT;
+ case Shader::ImageFormat::R32_UINT:
+ return VK_FORMAT_R32_UINT;
+ case Shader::ImageFormat::R32G32_UINT:
+ return VK_FORMAT_R32G32_UINT;
+ case Shader::ImageFormat::R32G32B32A32_UINT:
+ return VK_FORMAT_R32G32B32A32_UINT;
+ }
+ UNREACHABLE_MSG("Invalid image format={}", format);
+ return VK_FORMAT_R32_UINT;
+}
} // Anonymous namespace
void TextureCacheRuntime::Finish() {
@@ -608,7 +606,10 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT;
const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT;
- ASSERT(aspect_mask == ImageAspectMask(dst.format));
+ if (aspect_mask != ImageAspectMask(dst.format)) {
+ UNIMPLEMENTED_MSG("Incompatible blit from format {} to {}", src.format, dst.format);
+ return;
+ }
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
operation);
@@ -622,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
return;
}
}
- ASSERT(src.ImageFormat() == dst.ImageFormat());
+ ASSERT(src.format == dst.format);
ASSERT(!(is_dst_msaa && !is_src_msaa));
ASSERT(operation == Fermi2D::Operation::SrcCopy);
@@ -762,7 +763,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
dst_range.AddLayers(copy.dstSubresource);
src_range.AddLayers(copy.srcSubresource);
}
- const std::array read_barriers{
+ const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -771,7 +772,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
@@ -792,42 +793,56 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
},
};
- const VkImageMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
- .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = dst_image,
- .subresourceRange = dst_range.SubresourceRange(aspect_mask),
+ const std::array post_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = src_image,
+ .subresourceRange = src_range.SubresourceRange(aspect_mask),
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange = dst_range.SubresourceRange(aspect_mask),
+ },
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, {}, {}, read_barriers);
- cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
+ 0, {}, {}, pre_barriers);
+ cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- 0, write_barrier);
+ 0, {}, {}, post_barriers);
});
}
+u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
+ return device.GetDeviceLocalMemory();
+}
+
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
- image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
+ image(MakeImage(runtime.device, info)),
+ commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
aspect_mask(ImageAspectMask(info.format)) {
- if (image) {
- commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
- } else {
- commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
- }
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
if (Settings::values.accelerate_astc.GetValue()) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
@@ -836,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
if (runtime.device.HasDebuggingToolAttached()) {
- if (image) {
- image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
- } else {
- buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
- }
+ image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
@@ -876,6 +887,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
+Image::~Image() = default;
+
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
@@ -890,21 +903,9 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
});
}
-void Image::UploadMemory(const StagingBufferRef& map,
- std::span<const VideoCommon::BufferCopy> copies) {
- // TODO: Move this to another API
- scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferCopies(copies, map.offset);
- const VkBuffer src_buffer = map.buffer;
- const VkBuffer dst_buffer = *buffer;
- scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
- // TODO: Barriers
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
- });
-}
-
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
@@ -960,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
- image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
- image.info.num_samples)} {
+ image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
+ using Shader::TextureType;
+
const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
std::array<SwizzleSource, 4> swizzle{
SwizzleSource::R,
@@ -999,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
},
.subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
};
- const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
+ const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
VkImageViewCreateInfo ci{create_info};
- ci.viewType = ImageViewType(view_type);
+ ci.viewType = ImageViewType(tex_type);
if (num_layers) {
ci.subresourceRange.layerCount = *num_layers;
}
vk::ImageView handle = device->GetLogical().CreateImageView(ci);
if (device->HasDebuggingToolAttached()) {
- handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
+ handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
- image_views[static_cast<size_t>(view_type)] = std::move(handle);
+ image_views[static_cast<size_t>(tex_type)] = std::move(handle);
};
switch (info.type) {
case VideoCommon::ImageViewType::e1D:
case VideoCommon::ImageViewType::e1DArray:
- create(VideoCommon::ImageViewType::e1D, 1);
- create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
- render_target = Handle(VideoCommon::ImageViewType::e1DArray);
+ create(TextureType::Color1D, 1);
+ create(TextureType::ColorArray1D, std::nullopt);
+ render_target = Handle(TextureType::ColorArray1D);
break;
case VideoCommon::ImageViewType::e2D:
case VideoCommon::ImageViewType::e2DArray:
- create(VideoCommon::ImageViewType::e2D, 1);
- create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
- render_target = Handle(VideoCommon::ImageViewType::e2DArray);
+ create(TextureType::Color2D, 1);
+ create(TextureType::ColorArray2D, std::nullopt);
+ render_target = Handle(Shader::TextureType::ColorArray2D);
break;
case VideoCommon::ImageViewType::e3D:
- create(VideoCommon::ImageViewType::e3D, std::nullopt);
- render_target = Handle(VideoCommon::ImageViewType::e3D);
+ create(TextureType::Color3D, std::nullopt);
+ render_target = Handle(Shader::TextureType::Color3D);
break;
case VideoCommon::ImageViewType::Cube:
case VideoCommon::ImageViewType::CubeArray:
- create(VideoCommon::ImageViewType::Cube, 6);
- create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
+ create(TextureType::ColorCube, 6);
+ create(TextureType::ColorArrayCube, std::nullopt);
break;
case VideoCommon::ImageViewType::Rect:
UNIMPLEMENTED();
break;
case VideoCommon::ImageViewType::Buffer:
- buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .buffer = image.Buffer(),
- .format = format_info.format,
- .offset = 0, // TODO: Redesign buffer cache to support this
- .range = image.guest_size_bytes,
- });
+ UNREACHABLE();
break;
}
}
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+ : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params} {}
@@ -1057,7 +1056,8 @@ VkImageView ImageView::DepthView() {
if (depth_view) {
return *depth_view;
}
- depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
+ const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+ depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT);
return *depth_view;
}
@@ -1065,18 +1065,38 @@ VkImageView ImageView::StencilView() {
if (stencil_view) {
return *stencil_view;
}
- stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
+ const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+ stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT);
return *stencil_view;
}
-vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
+VkImageView ImageView::StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format) {
+ if (image_format == Shader::ImageFormat::Typeless) {
+ return Handle(texture_type);
+ }
+ const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+ image_format == Shader::ImageFormat::R16_SINT};
+ if (!storage_views) {
+ storage_views = std::make_unique<StorageViews>();
+ }
+ auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+ auto& view{views[static_cast<size_t>(texture_type)]};
+ if (view) {
+ return *view;
+ }
+ view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
+ return *view;
+}
+
+vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
return device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
- .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format,
+ .format = vk_format,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1140,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
- std::vector<VkAttachmentDescription> descriptions;
std::vector<VkImageView> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
@@ -1151,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
renderpass_key.color_formats[index] = PixelFormat::Invalid;
continue;
}
- descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
attachments.push_back(color_buffer->RenderTarget());
renderpass_key.color_formats[index] = color_buffer->format;
num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1161,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
++num_images;
}
const size_t num_colors = attachments.size();
- const VkAttachmentReference* depth_attachment =
- depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
if (depth_buffer) {
- descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
attachments.push_back(depth_buffer->RenderTarget());
renderpass_key.depth_format = depth_buffer->format;
num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1177,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
}
renderpass_key.samples = samples;
- const auto& device = runtime.device.GetLogical();
- const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
- if (is_new) {
- const VkSubpassDescription subpass{
- .flags = 0,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .pInputAttachments = nullptr,
- .colorAttachmentCount = static_cast<u32>(num_colors),
- .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
- .pResolveAttachments = nullptr,
- .pDepthStencilAttachment = depth_attachment,
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = nullptr,
- };
- cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .attachmentCount = static_cast<u32>(descriptions.size()),
- .pAttachments = descriptions.data(),
- .subpassCount = 1,
- .pSubpasses = &subpass,
- .dependencyCount = 0,
- .pDependencies = nullptr,
- });
- }
- renderpass = *cache_pair->second;
+ renderpass = runtime.render_pass_cache.Get(renderpass_key);
+
render_area = VkExtent2D{
.width = key.size.width,
.height = key.size.height,
};
num_color_buffers = static_cast<u32>(num_colors);
- framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
+ framebuffer = runtime.device.GetLogical().CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4a57d378b..0b73d55f8 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
#include <compare>
#include <span>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
@@ -26,35 +27,10 @@ class Device;
class Image;
class ImageView;
class Framebuffer;
+class RenderPassCache;
class StagingBufferPool;
class VKScheduler;
-struct RenderPassKey {
- constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
-
- std::array<PixelFormat, NUM_RT> color_formats;
- PixelFormat depth_format;
- VkSampleCountFlagBits samples;
-};
-
-} // namespace Vulkan
-
-namespace std {
-template <>
-struct hash<Vulkan::RenderPassKey> {
- [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
- size_t value = static_cast<size_t>(key.depth_format) << 48;
- value ^= static_cast<size_t>(key.samples) << 52;
- for (size_t i = 0; i < key.color_formats.size(); ++i) {
- value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
- }
- return value;
- }
-};
-} // namespace std
-
-namespace Vulkan {
-
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
@@ -62,13 +38,13 @@ struct TextureCacheRuntime {
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
ASTCDecoderPass& astc_decoder_pass;
- std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
+ RenderPassCache& render_pass_cache;
void Finish();
- [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
+ StagingBufferRef UploadStagingBuffer(size_t size);
- [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+ StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
@@ -79,7 +55,7 @@ struct TextureCacheRuntime {
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
- [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
+ bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
}
@@ -97,6 +73,8 @@ struct TextureCacheRuntime {
// All known Vulkan drivers can natively handle BGR textures
return true;
}
+
+ u64 GetDeviceLocalMemory() const;
};
class Image : public VideoCommon::ImageBase {
@@ -104,11 +82,17 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
+ ~Image();
+
+ Image(const Image&) = delete;
+ Image& operator=(const Image&) = delete;
+
+ Image(Image&&) = default;
+ Image& operator=(Image&&) = default;
+
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
-
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
@@ -116,10 +100,6 @@ public:
return *image;
}
- [[nodiscard]] VkBuffer Buffer() const noexcept {
- return *buffer;
- }
-
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
return aspect_mask;
}
@@ -136,7 +116,6 @@ public:
private:
VKScheduler* scheduler;
vk::Image image;
- vk::Buffer buffer;
MemoryCommit commit;
vk::ImageView image_view;
std::vector<vk::ImageView> storage_image_views;
@@ -147,18 +126,19 @@ private:
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+ const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] VkImageView DepthView();
[[nodiscard]] VkImageView StencilView();
- [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
- return *image_views[static_cast<size_t>(query_type)];
- }
+ [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format);
- [[nodiscard]] VkBufferView BufferView() const noexcept {
- return *buffer_view;
+ [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
+ return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
@@ -169,26 +149,36 @@ public:
return render_target;
}
- [[nodiscard]] PixelFormat ImageFormat() const noexcept {
- return image_format;
- }
-
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
+ [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+ return gpu_addr;
+ }
+
+ [[nodiscard]] u32 BufferSize() const noexcept {
+ return buffer_size;
+ }
+
private:
- [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
+ struct StorageViews {
+ std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
+ std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
+ };
+
+ [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
const Device* device = nullptr;
- std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
+ std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
+ std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
- vk::BufferView buffer_view;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
- PixelFormat image_format = PixelFormat::Invalid;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
+ GPUVAddr gpu_addr = 0;
+ u32 buffer_size = 0;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
@@ -257,6 +247,7 @@ struct TextureCacheParams {
static constexpr bool ENABLE_VALIDATION = true;
static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false;
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index dc45fdcb1..0df3a7fe9 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -15,7 +15,9 @@
namespace Vulkan {
VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
- : device{device_}, scheduler{scheduler_} {}
+ : device{device_}, scheduler{scheduler_} {
+ payload_cursor = payload.data();
+}
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
@@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() {
upload_start = payload_cursor;
}
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
- VkDescriptorSet set) {
- const void* const data = upload_start;
- const vk::Device* const logical = &device.GetLogical();
- scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
- logical->UpdateDescriptorSet(set, update_template, data);
- });
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d35e77c44..d7de4c490 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
void Acquire();
- void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
+ const DescriptorUpdateEntry* UpdateData() const noexcept {
+ return upload_start;
+ }
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
*(payload_cursor++) = VkDescriptorImageInfo{
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
deleted file mode 100644
index db11144c7..000000000
--- a/src/video_core/shader/ast.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-#include <string_view>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/expr.h"
-
-namespace VideoCommon::Shader {
-
-ASTZipper::ASTZipper() = default;
-
-void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
- ASSERT(new_first->manager == nullptr);
- first = new_first;
- last = new_first;
-
- ASTNode current = first;
- while (current) {
- current->manager = this;
- current->parent = parent;
- last = current;
- current = current->next;
- }
-}
-
-void ASTZipper::PushBack(const ASTNode new_node) {
- ASSERT(new_node->manager == nullptr);
- new_node->previous = last;
- if (last) {
- last->next = new_node;
- }
- new_node->next.reset();
- last = new_node;
- if (!first) {
- first = new_node;
- }
- new_node->manager = this;
-}
-
-void ASTZipper::PushFront(const ASTNode new_node) {
- ASSERT(new_node->manager == nullptr);
- new_node->previous.reset();
- new_node->next = first;
- if (first) {
- first->previous = new_node;
- }
- if (last == first) {
- last = new_node;
- }
- first = new_node;
- new_node->manager = this;
-}
-
-void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
- ASSERT(new_node->manager == nullptr);
- if (!at_node) {
- PushFront(new_node);
- return;
- }
- const ASTNode next = at_node->next;
- if (next) {
- next->previous = new_node;
- }
- new_node->previous = at_node;
- if (at_node == last) {
- last = new_node;
- }
- new_node->next = next;
- at_node->next = new_node;
- new_node->manager = this;
-}
-
-void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
- ASSERT(new_node->manager == nullptr);
- if (!at_node) {
- PushBack(new_node);
- return;
- }
- const ASTNode previous = at_node->previous;
- if (previous) {
- previous->next = new_node;
- }
- new_node->next = at_node;
- if (at_node == first) {
- first = new_node;
- }
- new_node->previous = previous;
- at_node->previous = new_node;
- new_node->manager = this;
-}
-
-void ASTZipper::DetachTail(ASTNode node) {
- ASSERT(node->manager == this);
- if (node == first) {
- first.reset();
- last.reset();
- return;
- }
-
- last = node->previous;
- last->next.reset();
- node->previous.reset();
-
- ASTNode current = std::move(node);
- while (current) {
- current->manager = nullptr;
- current->parent.reset();
- current = current->next;
- }
-}
-
-void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
- ASSERT(start->manager == this && end->manager == this);
- if (start == end) {
- DetachSingle(start);
- return;
- }
- const ASTNode prev = start->previous;
- const ASTNode post = end->next;
- if (!prev) {
- first = post;
- } else {
- prev->next = post;
- }
- if (!post) {
- last = prev;
- } else {
- post->previous = prev;
- }
- start->previous.reset();
- end->next.reset();
- ASTNode current = start;
- bool found = false;
- while (current) {
- current->manager = nullptr;
- current->parent.reset();
- found |= current == end;
- current = current->next;
- }
- ASSERT(found);
-}
-
-void ASTZipper::DetachSingle(const ASTNode node) {
- ASSERT(node->manager == this);
- const ASTNode prev = node->previous;
- const ASTNode post = node->next;
- node->previous.reset();
- node->next.reset();
- if (!prev) {
- first = post;
- } else {
- prev->next = post;
- }
- if (!post) {
- last = prev;
- } else {
- post->previous = prev;
- }
-
- node->manager = nullptr;
- node->parent.reset();
-}
-
-void ASTZipper::Remove(const ASTNode node) {
- ASSERT(node->manager == this);
- const ASTNode next = node->next;
- const ASTNode previous = node->previous;
- if (previous) {
- previous->next = next;
- }
- if (next) {
- next->previous = previous;
- }
- node->parent.reset();
- node->manager = nullptr;
- if (node == last) {
- last = previous;
- }
- if (node == first) {
- first = next;
- }
-}
-
-class ExprPrinter final {
-public:
- void operator()(const ExprAnd& expr) {
- inner += "( ";
- std::visit(*this, *expr.operand1);
- inner += " && ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprOr& expr) {
- inner += "( ";
- std::visit(*this, *expr.operand1);
- inner += " || ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprNot& expr) {
- inner += "!";
- std::visit(*this, *expr.operand1);
- }
-
- void operator()(const ExprPredicate& expr) {
- inner += fmt::format("P{}", expr.predicate);
- }
-
- void operator()(const ExprCondCode& expr) {
- inner += fmt::format("CC{}", expr.cc);
- }
-
- void operator()(const ExprVar& expr) {
- inner += fmt::format("V{}", expr.var_index);
- }
-
- void operator()(const ExprBoolean& expr) {
- inner += expr.value ? "true" : "false";
- }
-
- void operator()(const ExprGprEqual& expr) {
- inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- std::string inner;
-};
-
-class ASTPrinter {
-public:
- void operator()(const ASTProgram& ast) {
- scope++;
- inner += "program {\n";
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- inner += "}\n";
- scope--;
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
- scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- scope--;
- inner += fmt::format("{}}}\n", Indent());
- }
-
- void operator()(const ASTIfElse& ast) {
- inner += Indent();
- inner += "else {\n";
-
- scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- scope--;
-
- inner += Indent();
- inner += "}\n";
- }
-
- void operator()(const ASTBlockEncoded& ast) {
- inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
- }
-
- void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
- inner += Indent();
- inner += "Block;\n";
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
- }
-
- void operator()(const ASTLabel& ast) {
- inner += fmt::format("Label_{}:\n", ast.index);
- }
-
- void operator()(const ASTGoto& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner +=
- fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
- }
-
- void operator()(const ASTDoWhile& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}do {{\n", Indent());
- scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- scope--;
- inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
- }
-
- void operator()(const ASTReturn& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
- ast.kills ? "discard" : "exit");
- }
-
- void operator()(const ASTBreak& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- std::string_view Indent() {
- if (space_segment_scope == scope) {
- return space_segment;
- }
-
- // Ensure that we don't exceed our view.
- ASSERT(scope * 2 < spaces.size());
-
- space_segment = spaces.substr(0, scope * 2);
- space_segment_scope = scope;
- return space_segment;
- }
-
- std::string inner{};
- std::string_view space_segment;
-
- u32 scope{};
- u32 space_segment_scope{};
-
- static constexpr std::string_view spaces{" "};
-};
-
-std::string ASTManager::Print() const {
- ASTPrinter printer{};
- printer.Visit(main_node);
- return printer.GetResult();
-}
-
-ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
- : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
-
-ASTManager::~ASTManager() {
- Clear();
-}
-
-void ASTManager::Init() {
- main_node = ASTBase::Make<ASTProgram>(ASTNode{});
- program = std::get_if<ASTProgram>(main_node->GetInnerData());
- false_condition = MakeExpr<ExprBoolean>(false);
-}
-
-void ASTManager::DeclareLabel(u32 address) {
- const auto pair = labels_map.emplace(address, labels_count);
- if (pair.second) {
- labels_count++;
- labels.resize(labels_count);
- }
-}
-
-void ASTManager::InsertLabel(u32 address) {
- const u32 index = labels_map[address];
- const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
- labels[index] = label;
- program->nodes.PushBack(label);
-}
-
-void ASTManager::InsertGoto(Expr condition, u32 address) {
- const u32 index = labels_map[address];
- const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
- gotos.push_back(goto_node);
- program->nodes.PushBack(goto_node);
-}
-
-void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
- ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
- program->nodes.PushBack(std::move(block));
-}
-
-void ASTManager::InsertReturn(Expr condition, bool kills) {
- ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
- program->nodes.PushBack(std::move(node));
-}
-
-// The decompile algorithm is based on
-// "Taming control flow: A structured approach to eliminating goto statements"
-// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
-// on the same structured level as the label which they jump to. This is done,
-// through outward/inward movements and lifting. Once they are at the same
-// level, you can enclose them in an "if" structure or a "do-while" structure.
-void ASTManager::Decompile() {
- auto it = gotos.begin();
- while (it != gotos.end()) {
- const ASTNode goto_node = *it;
- const auto label_index = goto_node->GetGotoLabel();
- if (!label_index) {
- return;
- }
- const ASTNode label = labels[*label_index];
- if (!full_decompile) {
- // We only decompile backward jumps
- if (!IsBackwardsJump(goto_node, label)) {
- it++;
- continue;
- }
- }
- if (IndirectlyRelated(goto_node, label)) {
- while (!DirectlyRelated(goto_node, label)) {
- MoveOutward(goto_node);
- }
- }
- if (DirectlyRelated(goto_node, label)) {
- u32 goto_level = goto_node->GetLevel();
- const u32 label_level = label->GetLevel();
- while (label_level < goto_level) {
- MoveOutward(goto_node);
- goto_level--;
- }
- // TODO(Blinkhawk): Implement Lifting and Inward Movements
- }
- if (label->GetParent() == goto_node->GetParent()) {
- bool is_loop = false;
- ASTNode current = goto_node->GetPrevious();
- while (current) {
- if (current == label) {
- is_loop = true;
- break;
- }
- current = current->GetPrevious();
- }
-
- if (is_loop) {
- EncloseDoWhile(goto_node, label);
- } else {
- EncloseIfThen(goto_node, label);
- }
- it = gotos.erase(it);
- continue;
- }
- it++;
- }
- if (full_decompile) {
- for (const ASTNode& label : labels) {
- auto& manager = label->GetManager();
- manager.Remove(label);
- }
- labels.clear();
- } else {
- auto label_it = labels.begin();
- while (label_it != labels.end()) {
- bool can_remove = true;
- ASTNode label = *label_it;
- for (const ASTNode& goto_node : gotos) {
- const auto label_index = goto_node->GetGotoLabel();
- if (!label_index) {
- return;
- }
- ASTNode& glabel = labels[*label_index];
- if (glabel == label) {
- can_remove = false;
- break;
- }
- }
- if (can_remove) {
- label->MarkLabelUnused();
- }
- }
- }
-}
-
-bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
- u32 goto_level = goto_node->GetLevel();
- u32 label_level = label_node->GetLevel();
- while (goto_level > label_level) {
- goto_level--;
- goto_node = goto_node->GetParent();
- }
- while (label_level > goto_level) {
- label_level--;
- label_node = label_node->GetParent();
- }
- while (goto_node->GetParent() != label_node->GetParent()) {
- goto_node = goto_node->GetParent();
- label_node = label_node->GetParent();
- }
- ASTNode current = goto_node->GetPrevious();
- while (current) {
- if (current == label_node) {
- return true;
- }
- current = current->GetPrevious();
- }
- return false;
-}
-
-bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
- return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
-}
-
-bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
- if (first->GetParent() == second->GetParent()) {
- return false;
- }
- const u32 first_level = first->GetLevel();
- const u32 second_level = second->GetLevel();
- u32 min_level;
- u32 max_level;
- ASTNode max;
- ASTNode min;
- if (first_level > second_level) {
- min_level = second_level;
- min = second;
- max_level = first_level;
- max = first;
- } else {
- min_level = first_level;
- min = first;
- max_level = second_level;
- max = second;
- }
-
- while (max_level > min_level) {
- max_level--;
- max = max->GetParent();
- }
-
- return min->GetParent() == max->GetParent();
-}
-
-void ASTManager::ShowCurrentState(std::string_view state) const {
- LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
- SanityCheck();
-}
-
-void ASTManager::SanityCheck() const {
- for (const auto& label : labels) {
- if (!label->GetParent()) {
- LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
- }
- }
-}
-
-void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
- ASTZipper& zipper = goto_node->GetManager();
- const ASTNode loop_start = label->GetNext();
- if (loop_start == goto_node) {
- zipper.Remove(goto_node);
- return;
- }
- const ASTNode parent = label->GetParent();
- const Expr condition = goto_node->GetGotoCondition();
- zipper.DetachSegment(loop_start, goto_node);
- const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
- ASTZipper* sub_zipper = do_while_node->GetSubNodes();
- sub_zipper->Init(loop_start, do_while_node);
- zipper.InsertAfter(do_while_node, label);
- sub_zipper->Remove(goto_node);
-}
-
-void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
- ASTZipper& zipper = goto_node->GetManager();
- const ASTNode if_end = label->GetPrevious();
- if (if_end == goto_node) {
- zipper.Remove(goto_node);
- return;
- }
- const ASTNode prev = goto_node->GetPrevious();
- const Expr condition = goto_node->GetGotoCondition();
- bool do_else = false;
- if (!disable_else_derivation && prev->IsIfThen()) {
- const Expr if_condition = prev->GetIfCondition();
- do_else = ExprAreEqual(if_condition, condition);
- }
- const ASTNode parent = label->GetParent();
- zipper.DetachSegment(goto_node, if_end);
- ASTNode if_node;
- if (do_else) {
- if_node = ASTBase::Make<ASTIfElse>(parent);
- } else {
- Expr neg_condition = MakeExprNot(condition);
- if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
- }
- ASTZipper* sub_zipper = if_node->GetSubNodes();
- sub_zipper->Init(goto_node, if_node);
- zipper.InsertAfter(if_node, prev);
- sub_zipper->Remove(goto_node);
-}
-
-void ASTManager::MoveOutward(ASTNode goto_node) {
- ASTZipper& zipper = goto_node->GetManager();
- const ASTNode parent = goto_node->GetParent();
- ASTZipper& zipper2 = parent->GetManager();
- const ASTNode grandpa = parent->GetParent();
- const bool is_loop = parent->IsLoop();
- const bool is_else = parent->IsIfElse();
- const bool is_if = parent->IsIfThen();
-
- const ASTNode prev = goto_node->GetPrevious();
- const ASTNode post = goto_node->GetNext();
-
- const Expr condition = goto_node->GetGotoCondition();
- zipper.DetachSingle(goto_node);
- if (is_loop) {
- const u32 var_index = NewVariable();
- const Expr var_condition = MakeExpr<ExprVar>(var_index);
- const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
- const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
- zipper2.InsertBefore(var_node_init, parent);
- zipper.InsertAfter(var_node, prev);
- goto_node->SetGotoCondition(var_condition);
- const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
- zipper.InsertAfter(break_node, var_node);
- } else if (is_if || is_else) {
- const u32 var_index = NewVariable();
- const Expr var_condition = MakeExpr<ExprVar>(var_index);
- const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
- const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
- if (is_if) {
- zipper2.InsertBefore(var_node_init, parent);
- } else {
- zipper2.InsertBefore(var_node_init, parent->GetPrevious());
- }
- zipper.InsertAfter(var_node, prev);
- goto_node->SetGotoCondition(var_condition);
- if (post) {
- zipper.DetachTail(post);
- const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
- ASTZipper* sub_zipper = if_node->GetSubNodes();
- sub_zipper->Init(post, if_node);
- zipper.InsertAfter(if_node, var_node);
- }
- } else {
- UNREACHABLE();
- }
- const ASTNode next = parent->GetNext();
- if (is_if && next && next->IsIfElse()) {
- zipper2.InsertAfter(goto_node, next);
- goto_node->SetParent(grandpa);
- return;
- }
- zipper2.InsertAfter(goto_node, parent);
- goto_node->SetParent(grandpa);
-}
-
-class ASTClearer {
-public:
- ASTClearer() = default;
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfElse& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
-
- void operator()(ASTBlockDecoded& ast) {
- ast.nodes.clear();
- }
-
- void operator()([[maybe_unused]] const ASTVarSet& ast) {}
-
- void operator()([[maybe_unused]] const ASTLabel& ast) {}
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {}
-
- void operator()(const ASTDoWhile& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()([[maybe_unused]] const ASTReturn& ast) {}
-
- void operator()([[maybe_unused]] const ASTBreak& ast) {}
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- node->Clear();
- }
-};
-
-void ASTManager::Clear() {
- if (!main_node) {
- return;
- }
- ASTClearer clearer{};
- clearer.Visit(main_node);
- main_node.reset();
- program = nullptr;
- labels_map.clear();
- labels.clear();
- gotos.clear();
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
deleted file mode 100644
index dc49b369e..000000000
--- a/src/video_core/shader/ast.h
+++ /dev/null
@@ -1,398 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <functional>
-#include <list>
-#include <memory>
-#include <optional>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "video_core/shader/expr.h"
-#include "video_core/shader/node.h"
-
-namespace VideoCommon::Shader {
-
-class ASTBase;
-class ASTBlockDecoded;
-class ASTBlockEncoded;
-class ASTBreak;
-class ASTDoWhile;
-class ASTGoto;
-class ASTIfElse;
-class ASTIfThen;
-class ASTLabel;
-class ASTProgram;
-class ASTReturn;
-class ASTVarSet;
-
-using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
- ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
-
-using ASTNode = std::shared_ptr<ASTBase>;
-
-enum class ASTZipperType : u32 {
- Program,
- IfThen,
- IfElse,
- Loop,
-};
-
-class ASTZipper final {
-public:
- explicit ASTZipper();
-
- void Init(ASTNode first, ASTNode parent);
-
- ASTNode GetFirst() const {
- return first;
- }
-
- ASTNode GetLast() const {
- return last;
- }
-
- void PushBack(ASTNode new_node);
- void PushFront(ASTNode new_node);
- void InsertAfter(ASTNode new_node, ASTNode at_node);
- void InsertBefore(ASTNode new_node, ASTNode at_node);
- void DetachTail(ASTNode node);
- void DetachSingle(ASTNode node);
- void DetachSegment(ASTNode start, ASTNode end);
- void Remove(ASTNode node);
-
- ASTNode first;
- ASTNode last;
-};
-
-class ASTProgram {
-public:
- ASTZipper nodes{};
-};
-
-class ASTIfThen {
-public:
- explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
- Expr condition;
- ASTZipper nodes{};
-};
-
-class ASTIfElse {
-public:
- ASTZipper nodes{};
-};
-
-class ASTBlockEncoded {
-public:
- explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
- u32 start;
- u32 end;
-};
-
-class ASTBlockDecoded {
-public:
- explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
- NodeBlock nodes;
-};
-
-class ASTVarSet {
-public:
- explicit ASTVarSet(u32 index_, Expr condition_)
- : index{index_}, condition{std::move(condition_)} {}
-
- u32 index;
- Expr condition;
-};
-
-class ASTLabel {
-public:
- explicit ASTLabel(u32 index_) : index{index_} {}
- u32 index;
- bool unused{};
-};
-
-class ASTGoto {
-public:
- explicit ASTGoto(Expr condition_, u32 label_)
- : condition{std::move(condition_)}, label{label_} {}
-
- Expr condition;
- u32 label;
-};
-
-class ASTDoWhile {
-public:
- explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
- Expr condition;
- ASTZipper nodes{};
-};
-
-class ASTReturn {
-public:
- explicit ASTReturn(Expr condition_, bool kills_)
- : condition{std::move(condition_)}, kills{kills_} {}
-
- Expr condition;
- bool kills;
-};
-
-class ASTBreak {
-public:
- explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
- Expr condition;
-};
-
-class ASTBase {
-public:
- explicit ASTBase(ASTNode parent_, ASTData data_)
- : data{std::move(data_)}, parent{std::move(parent_)} {}
-
- template <class U, class... Args>
- static ASTNode Make(ASTNode parent, Args&&... args) {
- return std::make_shared<ASTBase>(std::move(parent),
- ASTData(U(std::forward<Args>(args)...)));
- }
-
- void SetParent(ASTNode new_parent) {
- parent = std::move(new_parent);
- }
-
- ASTNode& GetParent() {
- return parent;
- }
-
- const ASTNode& GetParent() const {
- return parent;
- }
-
- u32 GetLevel() const {
- u32 level = 0;
- auto next_parent = parent;
- while (next_parent) {
- next_parent = next_parent->GetParent();
- level++;
- }
- return level;
- }
-
- ASTData* GetInnerData() {
- return &data;
- }
-
- const ASTData* GetInnerData() const {
- return &data;
- }
-
- ASTNode GetNext() const {
- return next;
- }
-
- ASTNode GetPrevious() const {
- return previous;
- }
-
- ASTZipper& GetManager() {
- return *manager;
- }
-
- const ASTZipper& GetManager() const {
- return *manager;
- }
-
- std::optional<u32> GetGotoLabel() const {
- if (const auto* inner = std::get_if<ASTGoto>(&data)) {
- return {inner->label};
- }
- return std::nullopt;
- }
-
- Expr GetGotoCondition() const {
- if (const auto* inner = std::get_if<ASTGoto>(&data)) {
- return inner->condition;
- }
- return nullptr;
- }
-
- void MarkLabelUnused() {
- if (auto* inner = std::get_if<ASTLabel>(&data)) {
- inner->unused = true;
- }
- }
-
- bool IsLabelUnused() const {
- if (const auto* inner = std::get_if<ASTLabel>(&data)) {
- return inner->unused;
- }
- return true;
- }
-
- std::optional<u32> GetLabelIndex() const {
- if (const auto* inner = std::get_if<ASTLabel>(&data)) {
- return {inner->index};
- }
- return std::nullopt;
- }
-
- Expr GetIfCondition() const {
- if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
- return inner->condition;
- }
- return nullptr;
- }
-
- void SetGotoCondition(Expr new_condition) {
- if (auto* inner = std::get_if<ASTGoto>(&data)) {
- inner->condition = std::move(new_condition);
- }
- }
-
- bool IsIfThen() const {
- return std::holds_alternative<ASTIfThen>(data);
- }
-
- bool IsIfElse() const {
- return std::holds_alternative<ASTIfElse>(data);
- }
-
- bool IsBlockEncoded() const {
- return std::holds_alternative<ASTBlockEncoded>(data);
- }
-
- void TransformBlockEncoded(NodeBlock&& nodes) {
- data = ASTBlockDecoded(std::move(nodes));
- }
-
- bool IsLoop() const {
- return std::holds_alternative<ASTDoWhile>(data);
- }
-
- ASTZipper* GetSubNodes() {
- if (std::holds_alternative<ASTProgram>(data)) {
- return &std::get_if<ASTProgram>(&data)->nodes;
- }
- if (std::holds_alternative<ASTIfThen>(data)) {
- return &std::get_if<ASTIfThen>(&data)->nodes;
- }
- if (std::holds_alternative<ASTIfElse>(data)) {
- return &std::get_if<ASTIfElse>(&data)->nodes;
- }
- if (std::holds_alternative<ASTDoWhile>(data)) {
- return &std::get_if<ASTDoWhile>(&data)->nodes;
- }
- return nullptr;
- }
-
- void Clear() {
- next.reset();
- previous.reset();
- parent.reset();
- manager = nullptr;
- }
-
-private:
- friend class ASTZipper;
-
- ASTData data;
- ASTNode parent;
- ASTNode next;
- ASTNode previous;
- ASTZipper* manager{};
-};
-
-class ASTManager final {
-public:
- explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
- ~ASTManager();
-
- ASTManager(const ASTManager& o) = delete;
- ASTManager& operator=(const ASTManager& other) = delete;
-
- ASTManager(ASTManager&& other) noexcept = default;
- ASTManager& operator=(ASTManager&& other) noexcept = default;
-
- void Init();
-
- void DeclareLabel(u32 address);
-
- void InsertLabel(u32 address);
-
- void InsertGoto(Expr condition, u32 address);
-
- void InsertBlock(u32 start_address, u32 end_address);
-
- void InsertReturn(Expr condition, bool kills);
-
- std::string Print() const;
-
- void Decompile();
-
- void ShowCurrentState(std::string_view state) const;
-
- void SanityCheck() const;
-
- void Clear();
-
- bool IsFullyDecompiled() const {
- if (full_decompile) {
- return gotos.empty();
- }
-
- for (ASTNode goto_node : gotos) {
- auto label_index = goto_node->GetGotoLabel();
- if (!label_index) {
- return false;
- }
- ASTNode glabel = labels[*label_index];
- if (IsBackwardsJump(goto_node, glabel)) {
- return false;
- }
- }
- return true;
- }
-
- ASTNode GetProgram() const {
- return main_node;
- }
-
- u32 GetVariables() const {
- return variables;
- }
-
- const std::vector<ASTNode>& GetLabels() const {
- return labels;
- }
-
-private:
- bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
-
- bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
-
- bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
-
- void EncloseDoWhile(ASTNode goto_node, ASTNode label);
-
- void EncloseIfThen(ASTNode goto_node, ASTNode label);
-
- void MoveOutward(ASTNode goto_node);
-
- u32 NewVariable() {
- return variables++;
- }
-
- bool full_decompile{};
- bool disable_else_derivation{};
- std::unordered_map<u32, u32> labels_map{};
- u32 labels_count{};
- std::vector<ASTNode> labels{};
- std::list<ASTNode> gotos{};
- u32 variables{};
- ASTProgram* program{};
- ASTNode main_node{};
- Expr false_condition{};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
deleted file mode 100644
index 02adcf9c7..000000000
--- a/src/video_core/shader/async_shaders.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-#include <vector>
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/shader/async_shaders.h"
-
-namespace VideoCommon::Shader {
-
-AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
-
-AsyncShaders::~AsyncShaders() {
- KillWorkers();
-}
-
-void AsyncShaders::AllocateWorkers() {
- // Use at least one thread
- u32 num_workers = 1;
-
- // Deduce how many more threads we can use
- const u32 thread_count = std::thread::hardware_concurrency();
- if (thread_count >= 8) {
- // Increase async workers by 1 for every 2 threads >= 8
- num_workers += 1 + (thread_count - 8) / 2;
- }
-
- // If we already have workers queued, ignore
- if (num_workers == worker_threads.size()) {
- return;
- }
-
- // If workers already exist, clear them
- if (!worker_threads.empty()) {
- FreeWorkers();
- }
-
- // Create workers
- for (std::size_t i = 0; i < num_workers; i++) {
- context_list.push_back(emu_window.CreateSharedContext());
- worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
- context_list[i].get());
- }
-}
-
-void AsyncShaders::FreeWorkers() {
- // Mark all threads to quit
- is_thread_exiting.store(true);
- cv.notify_all();
- for (auto& thread : worker_threads) {
- thread.join();
- }
- // Clear our shared contexts
- context_list.clear();
-
- // Clear our worker threads
- worker_threads.clear();
-}
-
-void AsyncShaders::KillWorkers() {
- is_thread_exiting.store(true);
- cv.notify_all();
- for (auto& thread : worker_threads) {
- thread.detach();
- }
- // Clear our shared contexts
- context_list.clear();
-
- // Clear our worker threads
- worker_threads.clear();
-}
-
-bool AsyncShaders::HasWorkQueued() const {
- return !pending_queue.empty();
-}
-
-bool AsyncShaders::HasCompletedWork() const {
- std::shared_lock lock{completed_mutex};
- return !finished_work.empty();
-}
-
-bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
- const auto& regs = gpu.Maxwell3D().regs;
-
- // If something is using depth, we can assume that games are not rendering anything which will
- // be used one time.
- if (regs.zeta_enable) {
- return true;
- }
-
- // If games are using a small index count, we can assume these are full screen quads. Usually
- // these shaders are only used once for building textures so we can assume they can't be built
- // async
- if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
- return false;
- }
-
- return true;
-}
-
-std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
- std::vector<Result> results;
- {
- std::unique_lock lock{completed_mutex};
- results = std::move(finished_work);
- finished_work.clear();
- }
- return results;
-}
-
-void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
- Tegra::Engines::ShaderType shader_type, u64 uid,
- std::vector<u64> code, std::vector<u64> code_b,
- u32 main_offset, CompilerSettings compiler_settings,
- const Registry& registry, VAddr cpu_addr) {
- std::unique_lock lock(queue_mutex);
- pending_queue.push({
- .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
- .device = &device,
- .shader_type = shader_type,
- .uid = uid,
- .code = std::move(code),
- .code_b = std::move(code_b),
- .main_offset = main_offset,
- .compiler_settings = compiler_settings,
- .registry = registry,
- .cpu_address = cpu_addr,
- .pp_cache = nullptr,
- .vk_device = nullptr,
- .scheduler = nullptr,
- .descriptor_pool = nullptr,
- .update_descriptor_queue = nullptr,
- .bindings{},
- .program{},
- .key{},
- .num_color_buffers = 0,
- });
- cv.notify_one();
-}
-
-void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
- const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
- Vulkan::VKDescriptorPool& descriptor_pool,
- Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
- std::vector<VkDescriptorSetLayoutBinding> bindings,
- Vulkan::SPIRVProgram program,
- Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
- std::unique_lock lock(queue_mutex);
- pending_queue.push({
- .backend = Backend::Vulkan,
- .device = nullptr,
- .shader_type{},
- .uid = 0,
- .code{},
- .code_b{},
- .main_offset = 0,
- .compiler_settings{},
- .registry{},
- .cpu_address = 0,
- .pp_cache = pp_cache,
- .vk_device = &device,
- .scheduler = &scheduler,
- .descriptor_pool = &descriptor_pool,
- .update_descriptor_queue = &update_descriptor_queue,
- .bindings = std::move(bindings),
- .program = std::move(program),
- .key = key,
- .num_color_buffers = num_color_buffers,
- });
- cv.notify_one();
-}
-
-void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
- while (!is_thread_exiting.load(std::memory_order_relaxed)) {
- std::unique_lock lock{queue_mutex};
- cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
- if (is_thread_exiting) {
- return;
- }
-
- // Partial lock to allow all threads to read at the same time
- if (!HasWorkQueued()) {
- continue;
- }
- // Another thread beat us, just unlock and wait for the next load
- if (pending_queue.empty()) {
- continue;
- }
-
- // Pull work from queue
- WorkerParams work = std::move(pending_queue.front());
- pending_queue.pop();
- lock.unlock();
-
- if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
- const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
- const auto scope = context->Acquire();
- auto program =
- OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
- Result result{};
- result.backend = work.backend;
- result.cpu_address = work.cpu_address;
- result.uid = work.uid;
- result.code = std::move(work.code);
- result.code_b = std::move(work.code_b);
- result.shader_type = work.shader_type;
-
- if (work.backend == Backend::OpenGL) {
- result.program.opengl = std::move(program->source_program);
- } else if (work.backend == Backend::GLASM) {
- result.program.glasm = std::move(program->assembly_program);
- }
-
- {
- std::unique_lock complete_lock(completed_mutex);
- finished_work.push_back(std::move(result));
- }
- } else if (work.backend == Backend::Vulkan) {
- auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
- *work.vk_device, *work.scheduler, *work.descriptor_pool,
- *work.update_descriptor_queue, work.key, work.bindings, work.program,
- work.num_color_buffers);
-
- work.pp_cache->EmplacePipeline(std::move(pipeline));
- }
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
deleted file mode 100644
index 7fdff6e56..000000000
--- a/src/video_core/shader/async_shaders.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <condition_variable>
-#include <memory>
-#include <shared_mutex>
-#include <thread>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-
-namespace Core::Frontend {
-class EmuWindow;
-class GraphicsContext;
-} // namespace Core::Frontend
-
-namespace Tegra {
-class GPU;
-}
-
-namespace Vulkan {
-class VKPipelineCache;
-}
-
-namespace VideoCommon::Shader {
-
-class AsyncShaders {
-public:
- enum class Backend {
- OpenGL,
- GLASM,
- Vulkan,
- };
-
- struct ResultPrograms {
- OpenGL::OGLProgram opengl;
- OpenGL::OGLAssemblyProgram glasm;
- };
-
- struct Result {
- u64 uid;
- VAddr cpu_address;
- Backend backend;
- ResultPrograms program;
- std::vector<u64> code;
- std::vector<u64> code_b;
- Tegra::Engines::ShaderType shader_type;
- };
-
- explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
- ~AsyncShaders();
-
- /// Start up shader worker threads
- void AllocateWorkers();
-
- /// Clear the shader queue and kill all worker threads
- void FreeWorkers();
-
- // Force end all threads
- void KillWorkers();
-
- /// Check to see if any shaders have actually been compiled
- [[nodiscard]] bool HasCompletedWork() const;
-
- /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
- /// every shader async as some shaders are only built and executed once. We try to "guess" which
- /// shader would be used only once
- [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
-
- /// Pulls completed compiled shaders
- [[nodiscard]] std::vector<Result> GetCompletedWork();
-
- void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
- u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
- CompilerSettings compiler_settings, const Registry& registry,
- VAddr cpu_addr);
-
- void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
- Vulkan::VKScheduler& scheduler,
- Vulkan::VKDescriptorPool& descriptor_pool,
- Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
- std::vector<VkDescriptorSetLayoutBinding> bindings,
- Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
- u32 num_color_buffers);
-
-private:
- void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
-
- /// Check our worker queue to see if we have any work queued already
- [[nodiscard]] bool HasWorkQueued() const;
-
- struct WorkerParams {
- Backend backend;
- // For OGL
- const OpenGL::Device* device;
- Tegra::Engines::ShaderType shader_type;
- u64 uid;
- std::vector<u64> code;
- std::vector<u64> code_b;
- u32 main_offset;
- CompilerSettings compiler_settings;
- std::optional<Registry> registry;
- VAddr cpu_address;
-
- // For Vulkan
- Vulkan::VKPipelineCache* pp_cache;
- const Vulkan::Device* vk_device;
- Vulkan::VKScheduler* scheduler;
- Vulkan::VKDescriptorPool* descriptor_pool;
- Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
- std::vector<VkDescriptorSetLayoutBinding> bindings;
- Vulkan::SPIRVProgram program;
- Vulkan::GraphicsPipelineCacheKey key;
- u32 num_color_buffers;
- };
-
- std::condition_variable cv;
- mutable std::mutex queue_mutex;
- mutable std::shared_mutex completed_mutex;
- std::atomic<bool> is_thread_exiting{};
- std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
- std::vector<std::thread> worker_threads;
- std::queue<WorkerParams> pending_queue;
- std::vector<Result> finished_work;
- Core::Frontend::EmuWindow& emu_window;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
deleted file mode 100644
index cddcbd4f0..000000000
--- a/src/video_core/shader/compiler_settings.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/shader/compiler_settings.h"
-
-namespace VideoCommon::Shader {
-
-std::string CompileDepthAsString(const CompileDepth cd) {
- switch (cd) {
- case CompileDepth::BruteForce:
- return "Brute Force Compile";
- case CompileDepth::FlowStack:
- return "Simple Flow Stack Mode";
- case CompileDepth::NoFlowStack:
- return "Remove Flow Stack";
- case CompileDepth::DecompileBackwards:
- return "Decompile Backward Jumps";
- case CompileDepth::FullDecompile:
- return "Full Decompilation";
- default:
- return "Unknown Compiler Process";
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
deleted file mode 100644
index 916018c01..000000000
--- a/src/video_core/shader/compiler_settings.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-enum class CompileDepth : u32 {
- BruteForce = 0,
- FlowStack = 1,
- NoFlowStack = 2,
- DecompileBackwards = 3,
- FullDecompile = 4,
-};
-
-std::string CompileDepthAsString(CompileDepth cd);
-
-struct CompilerSettings {
- CompileDepth depth{CompileDepth::NoFlowStack};
- bool disable_else_derivation{true};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
deleted file mode 100644
index 43d965f2f..000000000
--- a/src/video_core/shader/control_flow.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <list>
-#include <map>
-#include <set>
-#include <stack>
-#include <unordered_map>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-constexpr s32 unassigned_branch = -2;
-
-struct Query {
- u32 address{};
- std::stack<u32> ssy_stack{};
- std::stack<u32> pbk_stack{};
-};
-
-struct BlockStack {
- BlockStack() = default;
- explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
- std::stack<u32> ssy_stack{};
- std::stack<u32> pbk_stack{};
-};
-
-template <typename T, typename... Args>
-BlockBranchInfo MakeBranchInfo(Args&&... args) {
- static_assert(std::is_convertible_v<T, BranchData>);
- return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
-}
-
-bool BlockBranchIsIgnored(BlockBranchInfo first) {
- bool ignore = false;
- if (std::holds_alternative<SingleBranch>(*first)) {
- const auto branch = std::get_if<SingleBranch>(first.get());
- ignore = branch->ignore;
- }
- return ignore;
-}
-
-struct BlockInfo {
- u32 start{};
- u32 end{};
- bool visited{};
- BlockBranchInfo branch{};
-
- bool IsInside(const u32 address) const {
- return start <= address && address <= end;
- }
-};
-
-struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
- : program_code{program_code_}, registry{registry_}, start{start_} {}
-
- const ProgramCode& program_code;
- Registry& registry;
- u32 start{};
- std::vector<BlockInfo> block_info;
- std::list<u32> inspect_queries;
- std::list<Query> queries;
- std::unordered_map<u32, u32> registered;
- std::set<u32> labels;
- std::map<u32, u32> ssy_labels;
- std::map<u32, u32> pbk_labels;
- std::unordered_map<u32, BlockStack> stacks;
- ASTManager* manager{};
-};
-
-enum class BlockCollision : u32 { None, Found, Inside };
-
-std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
- const auto& blocks = state.block_info;
- for (u32 index = 0; index < blocks.size(); index++) {
- if (blocks[index].start == address) {
- return {BlockCollision::Found, index};
- }
- if (blocks[index].IsInside(address)) {
- return {BlockCollision::Inside, index};
- }
- }
- return {BlockCollision::None, 0xFFFFFFFF};
-}
-
-struct ParseInfo {
- BlockBranchInfo branch_info{};
- u32 end_address{};
-};
-
-BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
- auto& it = state.block_info.emplace_back();
- it.start = start;
- it.end = end;
- const u32 index = static_cast<u32>(state.block_info.size() - 1);
- state.registered.insert({start, index});
- return it;
-}
-
-Pred GetPredicate(u32 index, bool negated) {
- return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
-}
-
-enum class ParseResult : u32 {
- ControlCaught,
- BlockEnd,
- AbnormalFlow,
-};
-
-struct BranchIndirectInfo {
- u32 buffer{};
- u32 offset{};
- u32 entries{};
- s32 relative_position{};
-};
-
-struct BufferInfo {
- u32 index;
- u32 offset;
-};
-
-std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() != OpCode::Id::BRX) {
- return std::nullopt;
- }
- if (instr.brx.constant_buffer != 0) {
- return std::nullopt;
- }
- --pos;
- return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
-}
-
-template <typename Result, typename TestCallable, typename PackCallable>
-// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
-// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
-std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
- PackCallable pack) {
- for (; pos >= state.start; --pos) {
- if (IsSchedInstruction(pos, state.start)) {
- continue;
- }
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (!opcode) {
- continue;
- }
- if (test(instr, opcode->get())) {
- --pos;
- return std::make_optional(pack(instr, opcode->get()));
- }
- }
- return std::nullopt;
-}
-
-std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
- u64 brx_tracked_register) {
- return TrackInstruction<std::pair<BufferInfo, u64>>(
- state, pos,
- [brx_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::LD_C &&
- instr.gpr0.Value() == brx_tracked_register &&
- instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
- },
- [](auto instr, const auto& opcode) {
- const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
- static_cast<u32>(instr.cbuf36.GetOffset())};
- return std::make_pair(info, instr.gpr8.Value());
- });
-}
-
-std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
- u64 ldc_tracked_register) {
- return TrackInstruction<u64>(
- state, pos,
- [ldc_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::SHL_IMM &&
- instr.gpr0.Value() == ldc_tracked_register;
- },
- [](auto instr, const auto&) { return instr.gpr8.Value(); });
-}
-
-std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
- u64 shl_tracked_register) {
- return TrackInstruction<u32>(
- state, pos,
- [shl_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
- instr.gpr0.Value() == shl_tracked_register;
- },
- [](auto instr, const auto&) {
- return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
- });
-}
-
-std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
- const auto brx_info = GetBRXInfo(state, pos);
- if (!brx_info) {
- return std::nullopt;
- }
- const auto [relative_position, brx_tracked_register] = *brx_info;
-
- const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
- if (!ldc_info) {
- return std::nullopt;
- }
- const auto [buffer_info, ldc_tracked_register] = *ldc_info;
-
- const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
- if (!shl_tracked_register) {
- return std::nullopt;
- }
-
- const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
- if (!entries) {
- return std::nullopt;
- }
-
- return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
-}
-
-std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
- u32 offset = static_cast<u32>(address);
- const u32 end_address = static_cast<u32>(state.program_code.size());
- ParseInfo parse_info{};
- SingleBranch single_branch{};
-
- const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
- const auto pair = rebuild_state.labels.emplace(label_address);
- if (pair.second) {
- rebuild_state.inspect_queries.push_back(label_address);
- }
- };
-
- while (true) {
- if (offset >= end_address) {
- // ASSERT_OR_EXECUTE can't be used, as it ignores the break
- ASSERT_MSG(false, "Shader passed the current limit!");
-
- single_branch.address = exit_branch;
- single_branch.ignore = false;
- break;
- }
- if (state.registered.contains(offset)) {
- single_branch.address = offset;
- single_branch.ignore = true;
- break;
- }
- if (IsSchedInstruction(offset, state.start)) {
- offset++;
- continue;
- }
- const Instruction instr = {state.program_code[offset]};
- const auto opcode = OpCode::Decode(instr);
- if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
- offset++;
- continue;
- }
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::EXIT: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = exit_branch;
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::BRA: {
- if (instr.bra.constant_buffer != 0) {
- return {ParseResult::AbnormalFlow, parse_info};
- }
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- const u32 branch_offset = offset + instr.bra.GetBranchTarget();
- if (branch_offset == 0) {
- single_branch.address = exit_branch;
- } else {
- single_branch.address = branch_offset;
- }
- insert_label(state, branch_offset);
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::SYNC: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = unassigned_branch;
- single_branch.kill = false;
- single_branch.is_sync = true;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::BRK: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = unassigned_branch;
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = true;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::KIL: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = exit_branch;
- single_branch.kill = true;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::SSY: {
- const u32 target = offset + instr.bra.GetBranchTarget();
- insert_label(state, target);
- state.ssy_labels.emplace(offset, target);
- break;
- }
- case OpCode::Id::PBK: {
- const u32 target = offset + instr.bra.GetBranchTarget();
- insert_label(state, target);
- state.pbk_labels.emplace(offset, target);
- break;
- }
- case OpCode::Id::BRX: {
- const auto tmp = TrackBranchIndirectInfo(state, offset);
- if (!tmp) {
- LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
- return {ParseResult::AbnormalFlow, parse_info};
- }
-
- const auto result = *tmp;
- const s32 pc_target = offset + result.relative_position;
- std::vector<CaseBranch> branches;
- for (u32 i = 0; i < result.entries; i++) {
- auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
- if (!key) {
- return {ParseResult::AbnormalFlow, parse_info};
- }
- u32 value = *key;
- u32 target = static_cast<u32>((value >> 3) + pc_target);
- insert_label(state, target);
- branches.emplace_back(value, target);
- }
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<MultiBranch>(
- static_cast<u32>(instr.gpr8.Value()), std::move(branches));
-
- return {ParseResult::ControlCaught, parse_info};
- }
- default:
- break;
- }
-
- offset++;
- }
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- parse_info.end_address = offset - 1;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
- single_branch.is_brk, single_branch.ignore);
- return {ParseResult::BlockEnd, parse_info};
-}
-
-bool TryInspectAddress(CFGRebuildState& state) {
- if (state.inspect_queries.empty()) {
- return false;
- }
-
- const u32 address = state.inspect_queries.front();
- state.inspect_queries.pop_front();
- const auto [result, block_index] = TryGetBlock(state, address);
- switch (result) {
- case BlockCollision::Found: {
- return true;
- }
- case BlockCollision::Inside: {
- // This case is the tricky one:
- // We need to split the block into 2 separate blocks
- const u32 end = state.block_info[block_index].end;
- BlockInfo& new_block = CreateBlockInfo(state, address, end);
- BlockInfo& current_block = state.block_info[block_index];
- current_block.end = address - 1;
- new_block.branch = std::move(current_block.branch);
- BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
- const auto branch = std::get_if<SingleBranch>(forward_branch.get());
- branch->address = address;
- branch->ignore = true;
- current_block.branch = std::move(forward_branch);
- return true;
- }
- default:
- break;
- }
- const auto [parse_result, parse_info] = ParseCode(state, address);
- if (parse_result == ParseResult::AbnormalFlow) {
- // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
- return false;
- }
-
- BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
- block_info.branch = parse_info.branch_info;
- if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
- const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
- if (branch->condition.IsUnconditional()) {
- return true;
- }
- const u32 fallthrough_address = parse_info.end_address + 1;
- state.inspect_queries.push_front(fallthrough_address);
- return true;
- }
- return true;
-}
-
-bool TryQuery(CFGRebuildState& state) {
- const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
- BlockInfo& block) {
- auto gather_start = labels.lower_bound(block.start);
- const auto gather_end = labels.upper_bound(block.end);
- while (gather_start != gather_end) {
- cc.push(gather_start->second);
- ++gather_start;
- }
- };
- if (state.queries.empty()) {
- return false;
- }
-
- Query& q = state.queries.front();
- const u32 block_index = state.registered[q.address];
- BlockInfo& block = state.block_info[block_index];
- // If the block is visited, check if the stacks match, else gather the ssy/pbk
- // labels into the current stack and look if the branch at the end of the block
- // consumes a label. Schedule new queries accordingly
- if (block.visited) {
- BlockStack& stack = state.stacks[q.address];
- const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
- (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
- state.queries.pop_front();
- return all_okay;
- }
- block.visited = true;
- state.stacks.insert_or_assign(q.address, BlockStack{q});
-
- Query q2(q);
- state.queries.pop_front();
- gather_labels(q2.ssy_stack, state.ssy_labels, block);
- gather_labels(q2.pbk_stack, state.pbk_labels, block);
- if (std::holds_alternative<SingleBranch>(*block.branch)) {
- auto* branch = std::get_if<SingleBranch>(block.branch.get());
- if (!branch->condition.IsUnconditional()) {
- q2.address = block.end + 1;
- state.queries.push_back(q2);
- }
-
- auto& conditional_query = state.queries.emplace_back(q2);
- if (branch->is_sync) {
- if (branch->address == unassigned_branch) {
- branch->address = conditional_query.ssy_stack.top();
- }
- conditional_query.ssy_stack.pop();
- }
- if (branch->is_brk) {
- if (branch->address == unassigned_branch) {
- branch->address = conditional_query.pbk_stack.top();
- }
- conditional_query.pbk_stack.pop();
- }
- conditional_query.address = branch->address;
- return true;
- }
-
- const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
- for (const auto& branch_case : multi_branch->branches) {
- auto& conditional_query = state.queries.emplace_back(q2);
- conditional_query.address = branch_case.address;
- }
-
- return true;
-}
-
-void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
- const auto get_expr = [](const Condition& cond) -> Expr {
- Expr result;
- if (cond.cc != ConditionCode::T) {
- result = MakeExpr<ExprCondCode>(cond.cc);
- }
- if (cond.predicate != Pred::UnusedIndex) {
- u32 pred = static_cast<u32>(cond.predicate);
- bool negate = false;
- if (pred > 7) {
- negate = true;
- pred -= 8;
- }
- Expr extra = MakeExpr<ExprPredicate>(pred);
- if (negate) {
- extra = MakeExpr<ExprNot>(std::move(extra));
- }
- if (result) {
- return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
- }
- return extra;
- }
- if (result) {
- return result;
- }
- return MakeExpr<ExprBoolean>(true);
- };
-
- if (std::holds_alternative<SingleBranch>(*branch_info)) {
- const auto* branch = std::get_if<SingleBranch>(branch_info.get());
- if (branch->address < 0) {
- if (branch->kill) {
- mm.InsertReturn(get_expr(branch->condition), true);
- return;
- }
- mm.InsertReturn(get_expr(branch->condition), false);
- return;
- }
- mm.InsertGoto(get_expr(branch->condition), branch->address);
- return;
- }
- const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
- for (const auto& branch_case : multi_branch->branches) {
- mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
- branch_case.address);
- }
-}
-
-void DecompileShader(CFGRebuildState& state) {
- state.manager->Init();
- for (auto label : state.labels) {
- state.manager->DeclareLabel(label);
- }
- for (const auto& block : state.block_info) {
- if (state.labels.contains(block.start)) {
- state.manager->InsertLabel(block.start);
- }
- const bool ignore = BlockBranchIsIgnored(block.branch);
- const u32 end = ignore ? block.end + 1 : block.end;
- state.manager->InsertBlock(block.start, end);
- if (!ignore) {
- InsertBranch(*state.manager, block.branch);
- }
- }
- state.manager->Decompile();
-}
-
-} // Anonymous namespace
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
- const CompilerSettings& settings,
- Registry& registry) {
- auto result_out = std::make_unique<ShaderCharacteristics>();
- if (settings.depth == CompileDepth::BruteForce) {
- result_out->settings.depth = CompileDepth::BruteForce;
- return result_out;
- }
-
- CFGRebuildState state{program_code, start_address, registry};
- // Inspect Code and generate blocks
- state.labels.clear();
- state.labels.emplace(start_address);
- state.inspect_queries.push_back(state.start);
- while (!state.inspect_queries.empty()) {
- if (!TryInspectAddress(state)) {
- result_out->settings.depth = CompileDepth::BruteForce;
- return result_out;
- }
- }
-
- bool use_flow_stack = true;
-
- bool decompiled = false;
-
- if (settings.depth != CompileDepth::FlowStack) {
- // Decompile Stacks
- state.queries.push_back(Query{state.start, {}, {}});
- decompiled = true;
- while (!state.queries.empty()) {
- if (!TryQuery(state)) {
- decompiled = false;
- break;
- }
- }
- }
-
- use_flow_stack = !decompiled;
-
- // Sort and organize results
- std::sort(state.block_info.begin(), state.block_info.end(),
- [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
- if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
- ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
- settings.disable_else_derivation};
- state.manager = &manager;
- DecompileShader(state);
- decompiled = state.manager->IsFullyDecompiled();
- if (!decompiled) {
- if (settings.depth == CompileDepth::FullDecompile) {
- LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
- } else {
- LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
- }
- state.manager->ShowCurrentState("Of Shader");
- state.manager->Clear();
- } else {
- auto characteristics = std::make_unique<ShaderCharacteristics>();
- characteristics->start = start_address;
- characteristics->settings.depth = settings.depth;
- characteristics->manager = std::move(manager);
- characteristics->end = state.block_info.back().end + 1;
- return characteristics;
- }
- }
-
- result_out->start = start_address;
- result_out->settings.depth =
- use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
- result_out->blocks.clear();
- for (auto& block : state.block_info) {
- ShaderBlock new_block{};
- new_block.start = block.start;
- new_block.end = block.end;
- new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
- if (!new_block.ignore_branch) {
- new_block.branch = block.branch;
- }
- result_out->end = std::max(result_out->end, block.end);
- result_out->blocks.push_back(new_block);
- }
- if (!use_flow_stack) {
- result_out->labels = std::move(state.labels);
- return result_out;
- }
-
- auto back = result_out->blocks.begin();
- auto next = std::next(back);
- while (next != result_out->blocks.end()) {
- if (!state.labels.contains(next->start) && next->start == back->end + 1) {
- back->end = next->end;
- next = result_out->blocks.erase(next);
- continue;
- }
- back = next;
- ++next;
- }
-
- return result_out;
-}
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
deleted file mode 100644
index 37bf96492..000000000
--- a/src/video_core/shader/control_flow.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <list>
-#include <optional>
-#include <set>
-#include <variant>
-
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Pred;
-
-constexpr s32 exit_branch = -1;
-
-struct Condition {
- Pred predicate{Pred::UnusedIndex};
- ConditionCode cc{ConditionCode::T};
-
- bool IsUnconditional() const {
- return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
- }
-
- bool operator==(const Condition& other) const {
- return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
- }
-
- bool operator!=(const Condition& other) const {
- return !operator==(other);
- }
-};
-
-class SingleBranch {
-public:
- SingleBranch() = default;
- explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
- bool is_brk_, bool ignore_)
- : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
- ignore{ignore_} {}
-
- bool operator==(const SingleBranch& b) const {
- return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
- std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
- }
-
- bool operator!=(const SingleBranch& b) const {
- return !operator==(b);
- }
-
- Condition condition{};
- s32 address{exit_branch};
- bool kill{};
- bool is_sync{};
- bool is_brk{};
- bool ignore{};
-};
-
-struct CaseBranch {
- explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
- u32 cmp_value;
- u32 address;
-};
-
-class MultiBranch {
-public:
- explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
- : gpr{gpr_}, branches{std::move(branches_)} {}
-
- u32 gpr{};
- std::vector<CaseBranch> branches{};
-};
-
-using BranchData = std::variant<SingleBranch, MultiBranch>;
-using BlockBranchInfo = std::shared_ptr<BranchData>;
-
-bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
-
-struct ShaderBlock {
- u32 start{};
- u32 end{};
- bool ignore_branch{};
- BlockBranchInfo branch{};
-
- bool operator==(const ShaderBlock& sb) const {
- return std::tie(start, end, ignore_branch) ==
- std::tie(sb.start, sb.end, sb.ignore_branch) &&
- BlockBranchInfoAreEqual(branch, sb.branch);
- }
-
- bool operator!=(const ShaderBlock& sb) const {
- return !operator==(sb);
- }
-};
-
-struct ShaderCharacteristics {
- std::list<ShaderBlock> blocks{};
- std::set<u32> labels{};
- u32 start{};
- u32 end{};
- ASTManager manager{true, true};
- CompilerSettings settings{};
-};
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
- const CompilerSettings& settings,
- Registry& registry);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <limits>
-#include <set>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
- const std::list<SamplerEntry>& used_samplers) {
- if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
- return;
- }
- u32 count{};
- std::vector<u32> bound_offsets;
- for (const auto& sampler : used_samplers) {
- if (sampler.is_bindless) {
- continue;
- }
- ++count;
- bound_offsets.emplace_back(sampler.offset);
- }
- if (count > 1) {
- gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
- }
-}
-
-std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
- VideoCore::GuestDriverProfile& gpu_driver,
- const std::list<SamplerEntry>& used_samplers) {
- const u32 base_offset = sampler_to_deduce.offset;
- u32 max_offset{std::numeric_limits<u32>::max()};
- for (const auto& sampler : used_samplers) {
- if (sampler.is_bindless) {
- continue;
- }
- if (sampler.offset > base_offset) {
- max_offset = std::min(sampler.offset, max_offset);
- }
- }
- if (max_offset == std::numeric_limits<u32>::max()) {
- return std::nullopt;
- }
- return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
-}
-
-} // Anonymous namespace
-
-class ASTDecoder {
-public:
- explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
-
- void operator()(ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTIfThen& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTIfElse& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTBlockEncoded& ast) {}
-
- void operator()(ASTBlockDecoded& ast) {}
-
- void operator()(ASTVarSet& ast) {}
-
- void operator()(ASTLabel& ast) {}
-
- void operator()(ASTGoto& ast) {}
-
- void operator()(ASTDoWhile& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTReturn& ast) {}
-
- void operator()(ASTBreak& ast) {}
-
- void Visit(ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- if (node->IsBlockEncoded()) {
- auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
- NodeBlock bb = ir.DecodeRange(block->start, block->end);
- node->TransformBlockEncoded(std::move(bb));
- }
- }
-
-private:
- ShaderIR& ir;
-};
-
-void ShaderIR::Decode() {
- std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-
- decompiled = false;
- auto info = ScanFlow(program_code, main_offset, settings, registry);
- auto& shader_info = *info;
- coverage_begin = shader_info.start;
- coverage_end = shader_info.end;
- switch (shader_info.settings.depth) {
- case CompileDepth::FlowStack: {
- for (const auto& block : shader_info.blocks) {
- basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
- }
- break;
- }
- case CompileDepth::NoFlowStack: {
- disable_flow_stack = true;
- const auto insert_block = [this](NodeBlock& nodes, u32 label) {
- if (label == static_cast<u32>(exit_branch)) {
- return;
- }
- basic_blocks.insert({label, nodes});
- };
- const auto& blocks = shader_info.blocks;
- NodeBlock current_block;
- u32 current_label = static_cast<u32>(exit_branch);
- for (const auto& block : blocks) {
- if (shader_info.labels.contains(block.start)) {
- insert_block(current_block, current_label);
- current_block.clear();
- current_label = block.start;
- }
- if (!block.ignore_branch) {
- DecodeRangeInner(current_block, block.start, block.end);
- InsertControlFlow(current_block, block);
- } else {
- DecodeRangeInner(current_block, block.start, block.end + 1);
- }
- }
- insert_block(current_block, current_label);
- break;
- }
- case CompileDepth::DecompileBackwards:
- case CompileDepth::FullDecompile: {
- program_manager = std::move(shader_info.manager);
- disable_flow_stack = true;
- decompiled = true;
- ASTDecoder decoder{*this};
- ASTNode program = GetASTProgram();
- decoder.Visit(program);
- break;
- }
- default:
- LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
- [[fallthrough]];
- case CompileDepth::BruteForce: {
- const auto shader_end = static_cast<u32>(program_code.size());
- coverage_begin = main_offset;
- coverage_end = shader_end;
- for (u32 label = main_offset; label < shader_end; ++label) {
- basic_blocks.insert({label, DecodeRange(label, label + 1)});
- }
- break;
- }
- }
- if (settings.depth != shader_info.settings.depth) {
- LOG_WARNING(
- HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
- CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
- }
-}
-
-NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
- NodeBlock basic_block;
- DecodeRangeInner(basic_block, begin, end);
- return basic_block;
-}
-
-void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
- for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
- pc = DecodeInstr(bb, pc);
- }
-}
-
-void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
- const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
- Node result = n;
- if (cond.cc != ConditionCode::T) {
- result = Conditional(GetConditionCode(cond.cc), {result});
- }
- if (cond.predicate != Pred::UnusedIndex) {
- u32 pred = static_cast<u32>(cond.predicate);
- const bool is_neg = pred > 7;
- if (is_neg) {
- pred -= 8;
- }
- result = Conditional(GetPredicate(pred, is_neg), {result});
- }
- return result;
- };
- if (std::holds_alternative<SingleBranch>(*block.branch)) {
- auto branch = std::get_if<SingleBranch>(block.branch.get());
- if (branch->address < 0) {
- if (branch->kill) {
- Node n = Operation(OperationCode::Discard);
- n = apply_conditions(branch->condition, n);
- bb.push_back(n);
- global_code.push_back(n);
- return;
- }
- Node n = Operation(OperationCode::Exit);
- n = apply_conditions(branch->condition, n);
- bb.push_back(n);
- global_code.push_back(n);
- return;
- }
- Node n = Operation(OperationCode::Branch, Immediate(branch->address));
- n = apply_conditions(branch->condition, n);
- bb.push_back(n);
- global_code.push_back(n);
- return;
- }
- auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
- Node op_a = GetRegister(multi_branch->gpr);
- for (auto& branch_case : multi_branch->branches) {
- Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
- Node op_b = Immediate(branch_case.cmp_value);
- Node condition =
- GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
- auto result = Conditional(condition, {n});
- bb.push_back(result);
- global_code.push_back(result);
- }
-}
-
-u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
- // Ignore sched instructions when generating code.
- if (IsSchedInstruction(pc, main_offset)) {
- return pc + 1;
- }
-
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
- const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
-
- // Decoding failure
- if (!opcode) {
- UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
- bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
- nv_address, instr.value)));
- return pc + 1;
- }
-
- bb.push_back(Comment(
- fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
-
- using Tegra::Shader::Pred;
- UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
- "NeverExecute predicate not implemented");
-
- static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
- {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
- {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
- {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
- {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
- {OpCode::Type::Shift, &ShaderIR::DecodeShift},
- {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
- {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
- {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
- {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
- {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
- {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
- {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
- {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
- {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
- {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
- {OpCode::Type::Image, &ShaderIR::DecodeImage},
- {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
- {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
- {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
- {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
- {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
- {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
- {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
- {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
- {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
- {OpCode::Type::Video, &ShaderIR::DecodeVideo},
- {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
- };
-
- std::vector<Node> tmp_block;
- if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
- pc = (this->*decoder->second)(tmp_block, pc);
- } else {
- pc = DecodeOther(tmp_block, pc);
- }
-
- // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
- // executed.
- const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-
- if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
- const Node conditional =
- Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
- global_code.push_back(conditional);
- bb.push_back(conditional);
- } else {
- for (auto& node : tmp_block) {
- global_code.push_back(node);
- bb.push_back(node);
- }
- }
-
- return pc + 1;
-}
-
-void ShaderIR::PostDecode() {
- // Deduce texture handler size if needed
- auto gpu_driver = registry.AccessGuestDriverProfile();
- DeduceTextureHandlerSize(gpu_driver, used_samplers);
- // Deduce Indexed Samplers
- if (!uses_indexed_samplers) {
- return;
- }
- for (auto& sampler : used_samplers) {
- if (!sampler.is_indexed) {
- continue;
- }
- if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
- sampler.size = *size;
- } else {
- LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
- sampler.size = 1;
- }
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::SubOp;
-
-u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
-
- Node op_b = [&] {
- if (instr.is_b_imm) {
- return GetImmediate19(instr);
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::MOV_C:
- case OpCode::Id::MOV_R: {
- // MOV does not have neither 'abs' nor 'neg' bits.
- SetRegister(bb, instr.gpr0, op_b);
- break;
- }
- case OpCode::Id::FMUL_C:
- case OpCode::Id::FMUL_R:
- case OpCode::Id::FMUL_IMM: {
- // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
- if (instr.fmul.tab5cb8_2 != 0) {
- LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
- }
- if (instr.fmul.tab5c68_0 != 1) {
- LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
- }
-
- op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
-
- static constexpr std::array FmulPostFactor = {
- 1.000f, // None
- 0.500f, // Divide 2
- 0.250f, // Divide 4
- 0.125f, // Divide 8
- 8.000f, // Mul 8
- 4.000f, // Mul 4
- 2.000f, // Mul 2
- };
-
- if (instr.fmul.postfactor != 0) {
- op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
- Immediate(FmulPostFactor[instr.fmul.postfactor]));
- }
-
- // TODO(Rodrigo): Should precise be used when there's a postfactor?
- Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
-
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FADD_C:
- case OpCode::Id::FADD_R:
- case OpCode::Id::FADD_IMM: {
- op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
- op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
- Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::MUFU: {
- op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-
- Node value = [&]() {
- switch (instr.sub_op) {
- case SubOp::Cos:
- return Operation(OperationCode::FCos, PRECISE, op_a);
- case SubOp::Sin:
- return Operation(OperationCode::FSin, PRECISE, op_a);
- case SubOp::Ex2:
- return Operation(OperationCode::FExp2, PRECISE, op_a);
- case SubOp::Lg2:
- return Operation(OperationCode::FLog2, PRECISE, op_a);
- case SubOp::Rcp:
- return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
- case SubOp::Rsq:
- return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
- case SubOp::Sqrt:
- return Operation(OperationCode::FSqrt, PRECISE, op_a);
- default:
- UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
- return Immediate(0);
- }
- }();
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FMNMX_C:
- case OpCode::Id::FMNMX_R:
- case OpCode::Id::FMNMX_IMM: {
- op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
- op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
- const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
-
- const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
- const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
- const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FCMP_RR:
- case OpCode::Id::FCMP_RC:
- case OpCode::Id::FCMP_IMMR: {
- UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
- Node op_c = GetRegister(instr.gpr39);
- Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
- SetRegister(
- bb, instr.gpr0,
- Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
- break;
- }
- case OpCode::Id::RRO_C:
- case OpCode::Id::RRO_R:
- case OpCode::Id::RRO_IMM: {
- LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
-
- // Currently RRO is only implemented as a register move.
- op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
- SetRegister(bb, instr.gpr0, op_b);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::HalfType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- bool negate_a = false;
- bool negate_b = false;
- bool absolute_a = false;
- bool absolute_b = false;
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_R:
- if (instr.alu_half.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- negate_a = ((instr.value >> 43) & 1) != 0;
- negate_b = ((instr.value >> 31) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 30) & 1) != 0;
- break;
- case OpCode::Id::HADD2_C:
- if (instr.alu_half.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- negate_a = ((instr.value >> 43) & 1) != 0;
- negate_b = ((instr.value >> 56) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 54) & 1) != 0;
- break;
- case OpCode::Id::HMUL2_R:
- negate_a = ((instr.value >> 43) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 30) & 1) != 0;
- break;
- case OpCode::Id::HMUL2_C:
- negate_b = ((instr.value >> 31) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 54) & 1) != 0;
- break;
- default:
- UNREACHABLE();
- break;
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
- op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
-
- auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_C:
- case OpCode::Id::HMUL2_C:
- return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::HADD2_R:
- case OpCode::Id::HMUL2_R:
- return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
- default:
- UNREACHABLE();
- return {HalfType::F32, Immediate(0)};
- }
- }();
- op_b = UnpackHalfFloat(op_b, type_b);
- op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
-
- Node value = [this, opcode, op_a, op_b = op_b] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_C:
- case OpCode::Id::HADD2_R:
- return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
- case OpCode::Id::HMUL2_C:
- case OpCode::Id::HMUL2_R:
- return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
- default:
- UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
- return Immediate(0);
- }
- }();
- value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
-
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- } else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
-
- const Node op_b = UnpackHalfImmediate(instr, true);
-
- Node value = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_IMM:
- return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
- case OpCode::Id::HMUL2_IMM:
- return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
- SetRegister(bb, instr.gpr0, value);
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::MOV32_IMM: {
- SetRegister(bb, instr.gpr0, GetImmediate32(instr));
- break;
- }
- case OpCode::Id::FMUL32_IMM: {
- Node value =
- Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
- value = GetSaturatedFloat(value, instr.fmul32.saturate);
-
- SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FADD32I: {
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
- instr.fadd32i.negate_a);
- const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
- instr.fadd32i.negate_b);
-
- const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
- SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
- opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::IAdd3Height;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&]() {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::IADD_C:
- case OpCode::Id::IADD_R:
- case OpCode::Id::IADD_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
- UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
-
- op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
- op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
- Node value = Operation(OperationCode::UAdd, op_a, op_b);
-
- if (instr.iadd.x) {
- Node carry = GetInternalFlag(InternalFlag::Carry);
- Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
- value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
- }
-
- if (instr.generates_cc) {
- const Node i0 = Immediate(0);
-
- Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
- Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
- Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
-
- Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
- Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
- Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
- Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
-
- SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
- SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
- SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
- SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
- }
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::IADD3_C:
- case OpCode::Id::IADD3_R:
- case OpCode::Id::IADD3_IMM: {
- Node op_c = GetRegister(instr.gpr39);
-
- const auto ApplyHeight = [&](IAdd3Height height, Node value) {
- switch (height) {
- case IAdd3Height::None:
- return value;
- case IAdd3Height::LowerHalfWord:
- return BitfieldExtract(value, 0, 16);
- case IAdd3Height::UpperHalfWord:
- return BitfieldExtract(value, 16, 16);
- default:
- UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
- return Immediate(0);
- }
- };
-
- if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
- op_a = ApplyHeight(instr.iadd3.height_a, op_a);
- op_b = ApplyHeight(instr.iadd3.height_b, op_b);
- op_c = ApplyHeight(instr.iadd3.height_c, op_c);
- }
-
- op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
- op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
- op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
-
- const Node value = [&] {
- Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
- if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
- return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
- }
- const Node shifted = [&] {
- switch (instr.iadd3.mode) {
- case Tegra::Shader::IAdd3Mode::RightShift:
- // TODO(tech4me): According to
- // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
- // The addition between op_a and op_b should be done in uint33, more
- // investigation required
- return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
- Immediate(16));
- case Tegra::Shader::IAdd3Mode::LeftShift:
- return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
- Immediate(16));
- default:
- return add_ab;
- }
- }();
- return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
- }();
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::ISCADD_C:
- case OpCode::Id::ISCADD_R:
- case OpCode::Id::ISCADD_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in ISCADD is not implemented");
-
- op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
- op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
- const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
- const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
- const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::POPC_C:
- case OpCode::Id::POPC_R:
- case OpCode::Id::POPC_IMM: {
- if (instr.popc.invert) {
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
- }
- const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FLO_R:
- case OpCode::Id::FLO_C:
- case OpCode::Id::FLO_IMM: {
- Node value;
- if (instr.flo.invert) {
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
- }
- if (instr.flo.is_signed) {
- value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
- } else {
- value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
- }
- if (instr.flo.sh) {
- value =
- Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
- }
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::SEL_C:
- case OpCode::Id::SEL_R:
- case OpCode::Id::SEL_IMM: {
- const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
- const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::ICMP_CR:
- case OpCode::Id::ICMP_R:
- case OpCode::Id::ICMP_RC:
- case OpCode::Id::ICMP_IMM: {
- const Node zero = Immediate(0);
-
- const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::ICMP_CR:
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- GetRegister(instr.gpr39)};
- case OpCode::Id::ICMP_R:
- return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
- case OpCode::Id::ICMP_RC:
- return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::ICMP_IMM:
- return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
- default:
- UNREACHABLE();
- return {zero, zero};
- }
- }();
- const Node op_lhs = GetRegister(instr.gpr8);
- const Node comparison =
- GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
- SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
- break;
- }
- case OpCode::Id::LOP_C:
- case OpCode::Id::LOP_R:
- case OpCode::Id::LOP_IMM: {
- if (instr.alu.lop.invert_a)
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
- if (instr.alu.lop.invert_b)
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
-
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
- instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
- instr.generates_cc);
- break;
- }
- case OpCode::Id::LOP3_C:
- case OpCode::Id::LOP3_R:
- case OpCode::Id::LOP3_IMM: {
- const Node op_c = GetRegister(instr.gpr39);
- const Node lut = [&]() {
- if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
- return Immediate(instr.alu.lop3.GetImmLut28());
- } else {
- return Immediate(instr.alu.lop3.GetImmLut48());
- }
- }();
-
- WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
- break;
- }
- case OpCode::Id::IMNMX_C:
- case OpCode::Id::IMNMX_R:
- case OpCode::Id::IMNMX_IMM: {
- UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-
- const bool is_signed = instr.imnmx.is_signed;
-
- const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
- const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
- const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
- const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::LEA_R2:
- case OpCode::Id::LEA_R1:
- case OpCode::Id::LEA_IMM:
- case OpCode::Id::LEA_RZ:
- case OpCode::Id::LEA_HI: {
- auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::LEA_R2: {
- return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
- Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
- }
- case OpCode::Id::LEA_R1: {
- const bool neg = instr.lea.r1.neg != 0;
- return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
- GetRegister(instr.gpr20),
- Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
- }
- case OpCode::Id::LEA_IMM: {
- const bool neg = instr.lea.imm.neg != 0;
- return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
- Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
- Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
- }
- case OpCode::Id::LEA_RZ: {
- const bool neg = instr.lea.rz.neg != 0;
- return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
- GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
- Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
- }
- case OpCode::Id::LEA_HI:
- default:
- UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
-
- return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
- Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
- }
- }();
-
- UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
- "Unhandled LEA Predicate");
-
- Node value =
- Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
- value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
- SetRegister(bb, instr.gpr0, std::move(value));
-
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
- Node imm_lut, bool sets_cc) {
- const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
- Node value = Immediate(0);
- const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
- if (imm.GetValue() & 0x01) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x02) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x04) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x08) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x10) {
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x20) {
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x40) {
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x80) {
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- return value;
- }(op_a, op_b, op_c, imm_lut);
-
- SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
- SetRegister(bb, dest, lop3_fast);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::LogicOperation;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::PredicateResultMode;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::IADD32I: {
- UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
-
- op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
-
- Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
-
- SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::LOP32I: {
- if (instr.alu.lop32i.invert_a) {
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
- }
-
- if (instr.alu.lop32i.invert_b) {
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
- }
-
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
- std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
- instr.op_32.generates_cc != 0);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
- opcode->get().GetName());
- }
-
- return pc;
-}
-
-void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
- Node op_b, PredicateResultMode predicate_mode, Pred predicate,
- bool sets_cc) {
- Node result = [&] {
- switch (logic_op) {
- case LogicOperation::And:
- return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
- case LogicOperation::Or:
- return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
- case LogicOperation::Xor:
- return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
- case LogicOperation::PassB:
- return op_b;
- default:
- UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
- return Immediate(0);
- }
- }();
-
- SetInternalFlagsFromInteger(bb, result, sets_cc);
- SetRegister(bb, dest, result);
-
- // Write the predicate value depending on the predicate mode.
- switch (predicate_mode) {
- case PredicateResultMode::None:
- // Do nothing.
- return;
- case PredicateResultMode::NotZero: {
- // Set the predicate to true if the result is not zero.
- Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
- SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFE_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::BFE_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::BFE_IMM:
- return Immediate(instr.alu.GetSignedImm20_20());
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
-
- const bool is_signed = instr.bfe.is_signed;
-
- // using reverse parallel method in
- // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
- // note for later if possible to implement faster method.
- if (instr.bfe.brev) {
- const auto swap = [&](u32 s, u32 mask) {
- Node v1 =
- SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
- if (mask != 0) {
- v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
- Immediate(mask));
- }
- Node v2 = op_a;
- if (mask != 0) {
- v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
- Immediate(mask));
- }
- v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
- Immediate(s));
- return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
- std::move(v2));
- };
- op_a = swap(1, 0x55555555U);
- op_a = swap(2, 0x33333333U);
- op_a = swap(4, 0x0F0F0F0FU);
- op_a = swap(8, 0x00FF00FFU);
- op_a = swap(16, 0);
- }
-
- const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
- Immediate(0), Immediate(8));
- const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
- Immediate(8), Immediate(8));
- auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
- SetRegister(bb, instr.gpr0, std::move(result));
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFI_RC:
- return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::BFI_IMM_R:
- return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
- default:
- UNREACHABLE();
- return {Immediate(0), Immediate(0)};
- }
- }();
- const Node insert = GetRegister(instr.gpr8);
- const Node offset = BitfieldExtract(packed_shift, 0, 8);
- const Node bits = BitfieldExtract(packed_shift, 8, 8);
-
- const Node value =
- Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <limits>
-#include <optional>
-#include <utility>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-
-namespace {
-
-constexpr OperationCode GetFloatSelector(u64 selector) {
- return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
-}
-
-constexpr u32 SizeInBits(Register::Size size) {
- switch (size) {
- case Register::Size::Byte:
- return 8;
- case Register::Size::Short:
- return 16;
- case Register::Size::Word:
- return 32;
- case Register::Size::Long:
- return 64;
- }
- return 0;
-}
-
-constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
- Register::Size dst_size,
- bool src_signed,
- bool dst_signed) {
- const u32 dst_bits = SizeInBits(dst_size);
- if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
- if (src_signed == dst_signed) {
- return std::nullopt;
- }
- return std::make_pair(0, std::numeric_limits<s32>::max());
- }
- if (dst_signed) {
- // Signed destination, clamp to [-128, 127] for instance
- return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
- } else {
- // Unsigned destination
- if (dst_bits == 32) {
- // Avoid shifting by 32, that is undefined behavior
- return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
- }
- return std::make_pair(0, (1 << dst_bits) - 1);
- }
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::I2I_R:
- case OpCode::Id::I2I_C:
- case OpCode::Id::I2I_IMM: {
- const bool src_signed = instr.conversion.is_input_signed;
- const bool dst_signed = instr.conversion.is_output_signed;
- const Register::Size src_size = instr.conversion.src_size;
- const Register::Size dst_size = instr.conversion.dst_size;
- const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
-
- Node value = [this, instr, opcode] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::I2I_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::I2I_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::I2I_IMM:
- return Immediate(instr.alu.GetSignedImm20_20());
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- // Ensure the source selector is valid
- switch (instr.conversion.src_size) {
- case Register::Size::Byte:
- break;
- case Register::Size::Short:
- ASSERT(selector == 0 || selector == 2);
- break;
- default:
- ASSERT(selector == 0);
- break;
- }
-
- if (src_size != Register::Size::Word || selector != 0) {
- value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
- Immediate(selector * 8), Immediate(SizeInBits(src_size)));
- }
-
- value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
- instr.conversion.negate_a, src_signed);
-
- if (instr.alu.saturate_d) {
- if (src_signed && !dst_signed) {
- Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
- Immediate(1 << (SizeInBits(src_size) - 1)));
- value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
- std::move(value));
-
- // Simplify generated expressions, this can be removed without semantic impact
- SetTemporary(bb, 0, std::move(value));
- value = GetTemporary(0);
-
- if (dst_size != Register::Size::Word) {
- const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
- Node is_large =
- Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
- value = Operation(OperationCode::Select, std::move(is_large), limit,
- std::move(value));
- }
- } else if (const std::optional bounds =
- IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
- value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
- Immediate(bounds->first));
- value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
- Immediate(bounds->second));
- }
- } else if (dst_size != Register::Size::Word) {
- // No saturation, we only have to mask the result
- Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
- value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
- }
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::I2F_R:
- case OpCode::Id::I2F_C:
- case OpCode::Id::I2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in I2F is not implemented");
-
- Node value = [&] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::I2F_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::I2F_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::I2F_IMM:
- return Immediate(instr.alu.GetSignedImm20_20());
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- const bool input_signed = instr.conversion.is_input_signed;
-
- if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
- ASSERT(instr.conversion.src_size == Register::Size::Byte ||
- instr.conversion.src_size == Register::Size::Short);
- if (instr.conversion.src_size == Register::Size::Short) {
- ASSERT(offset == 0 || offset == 2);
- }
- value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
- std::move(value), Immediate(offset * 8));
- }
-
- value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
- value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
- value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
- value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-
- if (instr.conversion.dst_size == Register::Size::Short) {
- value = Operation(OperationCode::HCastFloat, PRECISE, value);
- }
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::F2F_R:
- case OpCode::Id::F2F_C:
- case OpCode::Id::F2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in F2F is not implemented");
-
- Node value = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::F2F_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::F2F_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::F2F_IMM:
- return GetImmediate19(instr);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- if (instr.conversion.src_size == Register::Size::Short) {
- value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
- std::move(value));
- } else {
- ASSERT(instr.conversion.float_src.selector == 0);
- }
-
- value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
-
- value = [&] {
- if (instr.conversion.src_size != instr.conversion.dst_size) {
- // Rounding operations only matter when the source and destination conversion size
- // is the same.
- return value;
- }
- switch (instr.conversion.f2f.GetRoundingMode()) {
- case Tegra::Shader::F2fRoundingOp::None:
- return value;
- case Tegra::Shader::F2fRoundingOp::Round:
- return Operation(OperationCode::FRoundEven, value);
- case Tegra::Shader::F2fRoundingOp::Floor:
- return Operation(OperationCode::FFloor, value);
- case Tegra::Shader::F2fRoundingOp::Ceil:
- return Operation(OperationCode::FCeil, value);
- case Tegra::Shader::F2fRoundingOp::Trunc:
- return Operation(OperationCode::FTrunc, value);
- default:
- UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
- instr.conversion.f2f.rounding.Value());
- return value;
- }
- }();
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-
- if (instr.conversion.dst_size == Register::Size::Short) {
- value = Operation(OperationCode::HCastFloat, PRECISE, value);
- }
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::F2I_R:
- case OpCode::Id::F2I_C:
- case OpCode::Id::F2I_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in F2I is not implemented");
- Node value = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::F2I_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::F2I_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::F2I_IMM:
- return GetImmediate19(instr);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- if (instr.conversion.src_size == Register::Size::Short) {
- value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
- std::move(value));
- } else {
- ASSERT(instr.conversion.float_src.selector == 0);
- }
-
- value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
-
- value = [&]() {
- switch (instr.conversion.f2i.rounding) {
- case Tegra::Shader::F2iRoundingOp::RoundEven:
- return Operation(OperationCode::FRoundEven, PRECISE, value);
- case Tegra::Shader::F2iRoundingOp::Floor:
- return Operation(OperationCode::FFloor, PRECISE, value);
- case Tegra::Shader::F2iRoundingOp::Ceil:
- return Operation(OperationCode::FCeil, PRECISE, value);
- case Tegra::Shader::F2iRoundingOp::Trunc:
- return Operation(OperationCode::FTrunc, PRECISE, value);
- default:
- UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
- instr.conversion.f2i.rounding.Value());
- return Immediate(0);
- }
- }();
- const bool is_signed = instr.conversion.is_output_signed;
- value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
- value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
- if (instr.ffma.tab5980_0 != 1) {
- LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
- }
- if (instr.ffma.tab5980_1 != 0) {
- LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
- }
-
- const Node op_a = GetRegister(instr.gpr8);
-
- auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::FFMA_CR: {
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- GetRegister(instr.gpr39)};
- }
- case OpCode::Id::FFMA_RR:
- return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
- case OpCode::Id::FFMA_RC: {
- return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- }
- case OpCode::Id::FFMA_IMM:
- return {GetImmediate19(instr), GetRegister(instr.gpr39)};
- default:
- UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
- return {Immediate(0), Immediate(0)};
- }
- }();
-
- op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
- op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
-
- Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
- instr.fset.neg_a != 0);
-
- Node op_b = [&]() {
- if (instr.is_b_imm) {
- return GetImmediate19(instr);
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
-
- // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
- // condition is true, and to 0 otherwise.
- const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
- const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
-
- const Node predicate = Operation(combiner, first_pred, second_pred);
-
- const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
- const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
- const Node value =
- Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
- if (instr.fset.bf) {
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- } else {
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- }
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
- instr.fsetp.neg_a != 0);
- Node op_b = [&]() {
- if (instr.is_b_imm) {
- return GetImmediate19(instr);
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
- op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node predicate =
- GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
- const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
- const Node value = Operation(combiner, predicate, second_pred);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(bb, instr.fsetp.pred3, value);
-
- if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
- const Node second_value = Operation(combiner, negated_pred, second_pred);
- SetPredicate(bb, instr.fsetp.pred0, second_value);
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-
-u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- PredCondition cond{};
- bool bf = false;
- bool ftz = false;
- bool neg_a = false;
- bool abs_a = false;
- bool neg_b = false;
- bool abs_b = false;
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSET2_C:
- case OpCode::Id::HSET2_IMM:
- cond = instr.hsetp2.cbuf_and_imm.cond;
- bf = instr.Bit(53);
- ftz = instr.Bit(54);
- neg_a = instr.Bit(43);
- abs_a = instr.Bit(44);
- neg_b = instr.Bit(56);
- abs_b = instr.Bit(54);
- break;
- case OpCode::Id::HSET2_R:
- cond = instr.hsetp2.reg.cond;
- bf = instr.Bit(49);
- ftz = instr.Bit(50);
- neg_a = instr.Bit(43);
- abs_a = instr.Bit(44);
- neg_b = instr.Bit(31);
- abs_b = instr.Bit(30);
- break;
- default:
- UNREACHABLE();
- }
-
- Node op_b = [this, instr, opcode] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSET2_C:
- // Inform as unimplemented as this is not tested.
- UNIMPLEMENTED_MSG("HSET2_C is not implemented");
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::HSET2_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::HSET2_IMM:
- return UnpackHalfImmediate(instr, true);
- default:
- UNREACHABLE();
- return Node{};
- }
- }();
-
- if (!ftz) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSET2_R:
- op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
- [[fallthrough]];
- case OpCode::Id::HSET2_C:
- op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
- break;
- default:
- break;
- }
-
- Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
-
- Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
-
- const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
-
- // HSET2 operates on each half float in the pack.
- std::array<Node, 2> values;
- for (u32 i = 0; i < 2; ++i) {
- const u32 raw_value = bf ? 0x3c00 : 0xffff;
- Node true_value = Immediate(raw_value << (i * 16));
- Node false_value = Immediate(0);
-
- Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
- Node predicate = Operation(combiner, comparison, second_pred);
- values[i] =
- Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
- }
-
- Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
- SetRegister(bb, instr.gpr0, move(value));
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (instr.hsetp2.ftz != 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
-
- Tegra::Shader::PredCondition cond{};
- bool h_and{};
- Node op_b{};
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSETP2_C:
- cond = instr.hsetp2.cbuf_and_imm.cond;
- h_and = instr.hsetp2.cbuf_and_imm.h_and;
- op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
- // F32 is hardcoded in hardware
- op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
- break;
- case OpCode::Id::HSETP2_IMM:
- cond = instr.hsetp2.cbuf_and_imm.cond;
- h_and = instr.hsetp2.cbuf_and_imm.h_and;
- op_b = UnpackHalfImmediate(instr, true);
- break;
- case OpCode::Id::HSETP2_R:
- cond = instr.hsetp2.reg.cond;
- h_and = instr.hsetp2.reg.h_and;
- op_b =
- GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
- instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
- break;
- default:
- UNREACHABLE();
- op_b = Immediate(0);
- }
-
- const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
- const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
-
- const auto Write = [&](u64 dest, Node src) {
- SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
- };
-
- const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
- const u64 first = instr.hsetp2.pred3;
- const u64 second = instr.hsetp2.pred0;
- if (h_and) {
- Node joined = Operation(OperationCode::LogicalAnd2, comparison);
- Write(first, joined);
- Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
- } else {
- Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
- Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::HalfPrecision;
-using Tegra::Shader::HalfType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
- DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
- } else {
- DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
- }
-
- constexpr auto identity = HalfType::H0_H1;
- bool neg_b{}, neg_c{};
- auto [saturate, type_b, op_b, type_c,
- op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HFMA2_CR:
- neg_b = instr.hfma2.negate_b;
- neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, HalfType::F32,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
- case OpCode::Id::HFMA2_RC:
- neg_b = instr.hfma2.negate_b;
- neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
- HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::HFMA2_RR:
- neg_b = instr.hfma2.rr.negate_b;
- neg_c = instr.hfma2.rr.negate_c;
- return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
- instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
- case OpCode::Id::HFMA2_IMM_R:
- neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
- instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
- default:
- return {false, identity, Immediate(0), identity, Immediate(0)};
- }
- }();
-
- const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
- op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
- op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
-
- Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
- value = GetSaturatedHalfFloat(value, saturate);
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
-
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/textures/texture.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-using Tegra::Shader::StoreType;
-using Tegra::Texture::ComponentType;
-using Tegra::Texture::TextureFormat;
-using Tegra::Texture::TICEntry;
-
-namespace {
-
-ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
- std::size_t component) {
- const TextureFormat format{descriptor.format};
- switch (format) {
- case TextureFormat::R16G16B16A16:
- case TextureFormat::R32G32B32A32:
- case TextureFormat::R32G32B32:
- case TextureFormat::R32G32:
- case TextureFormat::R16G16:
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R8:
- case TextureFormat::R1:
- if (component == 0) {
- return descriptor.r_type;
- }
- if (component == 1) {
- return descriptor.g_type;
- }
- if (component == 2) {
- return descriptor.b_type;
- }
- if (component == 3) {
- return descriptor.a_type;
- }
- break;
- case TextureFormat::A8R8G8B8:
- if (component == 0) {
- return descriptor.a_type;
- }
- if (component == 1) {
- return descriptor.r_type;
- }
- if (component == 2) {
- return descriptor.g_type;
- }
- if (component == 3) {
- return descriptor.b_type;
- }
- break;
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A4B4G4R4:
- case TextureFormat::A5B5G5R1:
- case TextureFormat::A1B5G5R5:
- if (component == 0) {
- return descriptor.a_type;
- }
- if (component == 1) {
- return descriptor.b_type;
- }
- if (component == 2) {
- return descriptor.g_type;
- }
- if (component == 3) {
- return descriptor.r_type;
- }
- break;
- case TextureFormat::R32_B24G8:
- if (component == 0) {
- return descriptor.r_type;
- }
- if (component == 1) {
- return descriptor.b_type;
- }
- if (component == 2) {
- return descriptor.g_type;
- }
- break;
- case TextureFormat::B5G6R5:
- case TextureFormat::B6G5R5:
- case TextureFormat::B10G11R11:
- if (component == 0) {
- return descriptor.b_type;
- }
- if (component == 1) {
- return descriptor.g_type;
- }
- if (component == 2) {
- return descriptor.r_type;
- }
- break;
- case TextureFormat::R24G8:
- case TextureFormat::R8G24:
- case TextureFormat::R8G8:
- case TextureFormat::G4R4:
- if (component == 0) {
- return descriptor.g_type;
- }
- if (component == 1) {
- return descriptor.r_type;
- }
- break;
- default:
- break;
- }
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return ComponentType::FLOAT;
-}
-
-bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
- constexpr u8 R = 0b0001;
- constexpr u8 G = 0b0010;
- constexpr u8 B = 0b0100;
- constexpr u8 A = 0b1000;
- constexpr std::array<u8, 16> mask = {
- 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
- (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
- return std::bitset<4>{mask.at(component_mask)}.test(component);
-}
-
-u32 GetComponentSize(TextureFormat format, std::size_t component) {
- switch (format) {
- case TextureFormat::R32G32B32A32:
- return 32;
- case TextureFormat::R16G16B16A16:
- return 16;
- case TextureFormat::R32G32B32:
- return component <= 2 ? 32 : 0;
- case TextureFormat::R32G32:
- return component <= 1 ? 32 : 0;
- case TextureFormat::R16G16:
- return component <= 1 ? 16 : 0;
- case TextureFormat::R32:
- return component == 0 ? 32 : 0;
- case TextureFormat::R16:
- return component == 0 ? 16 : 0;
- case TextureFormat::R8:
- return component == 0 ? 8 : 0;
- case TextureFormat::R1:
- return component == 0 ? 1 : 0;
- case TextureFormat::A8R8G8B8:
- return 8;
- case TextureFormat::A2B10G10R10:
- return (component == 3 || component == 2 || component == 1) ? 10 : 2;
- case TextureFormat::A4B4G4R4:
- return 4;
- case TextureFormat::A5B5G5R1:
- return (component == 0 || component == 1 || component == 2) ? 5 : 1;
- case TextureFormat::A1B5G5R5:
- return (component == 1 || component == 2 || component == 3) ? 5 : 1;
- case TextureFormat::R32_B24G8:
- if (component == 0) {
- return 32;
- }
- if (component == 1) {
- return 24;
- }
- if (component == 2) {
- return 8;
- }
- return 0;
- case TextureFormat::B5G6R5:
- if (component == 0 || component == 2) {
- return 5;
- }
- if (component == 1) {
- return 6;
- }
- return 0;
- case TextureFormat::B6G5R5:
- if (component == 1 || component == 2) {
- return 5;
- }
- if (component == 0) {
- return 6;
- }
- return 0;
- case TextureFormat::B10G11R11:
- if (component == 1 || component == 2) {
- return 11;
- }
- if (component == 0) {
- return 10;
- }
- return 0;
- case TextureFormat::R24G8:
- if (component == 0) {
- return 8;
- }
- if (component == 1) {
- return 24;
- }
- return 0;
- case TextureFormat::R8G24:
- if (component == 0) {
- return 24;
- }
- if (component == 1) {
- return 8;
- }
- return 0;
- case TextureFormat::R8G8:
- return (component == 0 || component == 1) ? 8 : 0;
- case TextureFormat::G4R4:
- return (component == 0 || component == 1) ? 4 : 0;
- default:
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return 0;
- }
-}
-
-std::size_t GetImageComponentMask(TextureFormat format) {
- constexpr u8 R = 0b0001;
- constexpr u8 G = 0b0010;
- constexpr u8 B = 0b0100;
- constexpr u8 A = 0b1000;
- switch (format) {
- case TextureFormat::R32G32B32A32:
- case TextureFormat::R16G16B16A16:
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A4B4G4R4:
- case TextureFormat::A5B5G5R1:
- case TextureFormat::A1B5G5R5:
- return std::size_t{R | G | B | A};
- case TextureFormat::R32G32B32:
- case TextureFormat::R32_B24G8:
- case TextureFormat::B5G6R5:
- case TextureFormat::B6G5R5:
- case TextureFormat::B10G11R11:
- return std::size_t{R | G | B};
- case TextureFormat::R32G32:
- case TextureFormat::R16G16:
- case TextureFormat::R24G8:
- case TextureFormat::R8G24:
- case TextureFormat::R8G8:
- case TextureFormat::G4R4:
- return std::size_t{R | G};
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R8:
- case TextureFormat::R1:
- return std::size_t{R};
- default:
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return std::size_t{R | G | B | A};
- }
-}
-
-std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- case Tegra::Shader::ImageType::TextureBuffer:
- return 1;
- case Tegra::Shader::ImageType::Texture1DArray:
- case Tegra::Shader::ImageType::Texture2D:
- return 2;
- case Tegra::Shader::ImageType::Texture2DArray:
- case Tegra::Shader::ImageType::Texture3D:
- return 3;
- }
- UNREACHABLE();
- return 1;
-}
-} // Anonymous namespace
-
-std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
- Node original_value) {
- switch (component_type) {
- case ComponentType::SNORM: {
- // range [-1.0, 1.0]
- auto cnv_value = Operation(OperationCode::FMul, original_value,
- Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
- cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
- return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
- }
- case ComponentType::SINT:
- case ComponentType::UNORM: {
- bool is_signed = component_type == ComponentType::SINT;
- // range [0.0, 1.0]
- auto cnv_value = Operation(OperationCode::FMul, original_value,
- Immediate(static_cast<float>(1 << component_size) - 1.f));
- return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
- is_signed};
- }
- case ComponentType::UINT: // range [0, (1 << component_size) - 1]
- return {std::move(original_value), false};
- case ComponentType::FLOAT:
- if (component_size == 16) {
- return {Operation(OperationCode::HCastFloat, original_value), true};
- } else {
- return {std::move(original_value), true};
- }
- default:
- UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
- return {std::move(original_value), true};
- }
-}
-
-u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
- std::vector<Node> coords;
- const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
- coords.reserve(num_coords);
- for (std::size_t i = 0; i < num_coords; ++i) {
- coords.push_back(GetRegister(instr.gpr8.Value() + i));
- }
- return coords;
- };
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::SULD: {
- UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
- Tegra::Shader::OutOfBoundsStore::Ignore);
-
- const auto type{instr.suldst.image_type};
- auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
- : GetBindlessImage(instr.gpr39, type)};
- image.MarkRead();
-
- if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
- u32 indexer = 0;
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.suldst.IsComponentEnabled(element)) {
- continue;
- }
- MetaImage meta{image, {}, element};
- Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
- SetTemporary(bb, indexer++, std::move(value));
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
- UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
- instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
-
- auto descriptor = [this, instr] {
- std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
- if (instr.suldst.is_immediate) {
- sampler_descriptor =
- registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
- } else {
- const Node image_register = GetRegister(instr.gpr39);
- const auto result = TrackCbuf(image_register, global_code,
- static_cast<s64>(global_code.size()));
- const auto buffer = std::get<1>(result);
- const auto offset = std::get<2>(result);
- sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
- }
- if (!sampler_descriptor) {
- UNREACHABLE_MSG("Failed to obtain image descriptor");
- }
- return *sampler_descriptor;
- }();
-
- const auto comp_mask = GetImageComponentMask(descriptor.format);
-
- switch (instr.suldst.GetStoreDataLayout()) {
- case StoreType::Bits32:
- case StoreType::Bits64: {
- u32 indexer = 0;
- u32 shifted_counter = 0;
- Node value = Immediate(0);
- for (u32 element = 0; element < 4; ++element) {
- if (!IsComponentEnabled(comp_mask, element)) {
- continue;
- }
- const auto component_type = GetComponentType(descriptor, element);
- const auto component_size = GetComponentSize(descriptor.format, element);
- MetaImage meta{image, {}, element};
-
- auto [converted_value, is_signed] = GetComponentValue(
- component_type, component_size,
- Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
-
- // shift element to correct position
- const auto shifted = shifted_counter;
- if (shifted > 0) {
- converted_value =
- SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
- std::move(converted_value), Immediate(shifted));
- }
- shifted_counter += component_size;
-
- // add value into result
- value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
-
- // if we shifted enough for 1 byte -> we save it into temp
- if (shifted_counter >= 32) {
- SetTemporary(bb, indexer++, std::move(value));
- // reset counter and value to prepare pack next byte
- value = Immediate(0);
- shifted_counter = 0;
- }
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- default:
- UNREACHABLE();
- break;
- }
- }
- break;
- }
- case OpCode::Id::SUST: {
- UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
- UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
- Tegra::Shader::OutOfBoundsStore::Ignore);
- UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
-
- std::vector<Node> values;
- constexpr std::size_t hardcoded_size{4};
- for (std::size_t i = 0; i < hardcoded_size; ++i) {
- values.push_back(GetRegister(instr.gpr0.Value() + i));
- }
-
- const auto type{instr.suldst.image_type};
- auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
- : GetBindlessImage(instr.gpr39, type)};
- image.MarkWrite();
-
- MetaImage meta{image, std::move(values)};
- bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
- break;
- }
- case OpCode::Id::SUATOM: {
- UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
-
- const OperationCode operation_code = [instr] {
- switch (instr.suatom_d.operation_type) {
- case Tegra::Shader::ImageAtomicOperationType::S32:
- case Tegra::Shader::ImageAtomicOperationType::U32:
- switch (instr.suatom_d.operation) {
- case Tegra::Shader::ImageAtomicOperation::Add:
- return OperationCode::AtomicImageAdd;
- case Tegra::Shader::ImageAtomicOperation::And:
- return OperationCode::AtomicImageAnd;
- case Tegra::Shader::ImageAtomicOperation::Or:
- return OperationCode::AtomicImageOr;
- case Tegra::Shader::ImageAtomicOperation::Xor:
- return OperationCode::AtomicImageXor;
- case Tegra::Shader::ImageAtomicOperation::Exch:
- return OperationCode::AtomicImageExchange;
- default:
- break;
- }
- break;
- default:
- break;
- }
- UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
- static_cast<u64>(instr.suatom_d.operation.Value()),
- static_cast<u64>(instr.suatom_d.operation_type.Value()));
- return OperationCode::AtomicImageAdd;
- }();
-
- Node value = GetRegister(instr.gpr0);
-
- const auto type = instr.suatom_d.image_type;
- auto& image = GetImage(instr.image, type);
- image.MarkAtomic();
-
- MetaImage meta{image, {std::move(value)}};
- SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset = static_cast<u32>(image.index.Value());
-
- const auto it =
- std::find_if(std::begin(used_images), std::end(used_images),
- [offset](const ImageEntry& entry) { return entry.offset == offset; });
- if (it != std::end(used_images)) {
- ASSERT(!it->is_bindless && it->type == type);
- return *it;
- }
-
- const auto next_index = static_cast<u32>(used_images.size());
- return used_images.emplace_back(next_index, offset, type);
-}
-
-ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
- const Node image_register = GetRegister(reg);
- const auto result =
- TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
-
- const auto buffer = std::get<1>(result);
- const auto offset = std::get<2>(result);
-
- const auto it = std::find_if(std::begin(used_images), std::end(used_images),
- [buffer, offset](const ImageEntry& entry) {
- return entry.buffer == buffer && entry.offset == offset;
- });
- if (it != std::end(used_images)) {
- ASSERT(it->is_bindless && it->type == type);
- return *it;
- }
-
- const auto next_index = static_cast<u32>(used_images.size());
- return used_images.emplace_back(next_index, offset, buffer, type);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- const Node op_a = GetRegister(instr.gpr8);
- const Node op_b = [&]() {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
- // is true, and to 0 otherwise.
- const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
- const Node first_pred =
- GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
-
- const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
-
- const Node predicate = Operation(combiner, first_pred, second_pred);
-
- const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
- const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
- const Node value =
- Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- const Node op_a = GetRegister(instr.gpr8);
-
- const Node op_b = [&]() {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
- const Node predicate =
- GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
- const Node value = Operation(combiner, predicate, second_pred);
- SetPredicate(bb, instr.isetp.pred3, value);
-
- if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
- const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
- SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::AtomicOp;
-using Tegra::Shader::AtomicType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::StoreType;
-
-namespace {
-
-OperationCode GetAtomOperation(AtomicOp op) {
- switch (op) {
- case AtomicOp::Add:
- return OperationCode::AtomicIAdd;
- case AtomicOp::Min:
- return OperationCode::AtomicIMin;
- case AtomicOp::Max:
- return OperationCode::AtomicIMax;
- case AtomicOp::And:
- return OperationCode::AtomicIAnd;
- case AtomicOp::Or:
- return OperationCode::AtomicIOr;
- case AtomicOp::Xor:
- return OperationCode::AtomicIXor;
- case AtomicOp::Exch:
- return OperationCode::AtomicIExchange;
- default:
- UNIMPLEMENTED_MSG("op={}", op);
- return OperationCode::AtomicIAdd;
- }
-}
-
-bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
- return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
- uniform_type == Tegra::Shader::UniformType::UnsignedShort;
-}
-
-u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
- switch (uniform_type) {
- case Tegra::Shader::UniformType::UnsignedByte:
- return 0b11;
- case Tegra::Shader::UniformType::UnsignedShort:
- return 0b10;
- default:
- UNREACHABLE();
- return 0;
- }
-}
-
-u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
- switch (uniform_type) {
- case Tegra::Shader::UniformType::UnsignedByte:
- return 8;
- case Tegra::Shader::UniformType::UnsignedShort:
- return 16;
- case Tegra::Shader::UniformType::Single:
- return 32;
- case Tegra::Shader::UniformType::Double:
- return 64;
- case Tegra::Shader::UniformType::Quad:
- case Tegra::Shader::UniformType::UnsignedQuad:
- return 128;
- default:
- UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
- return 32;
- }
-}
-
-Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
- Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
- return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
-}
-
-Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
- Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
- return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
- Immediate(size));
-}
-
-Node Sign16Extend(Node value) {
- Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
- Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
- Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
- return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::LD_A: {
- // Note: Shouldn't this be interp mode flat? As in no interpolation made.
- UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
- "Indirect attribute loads are not supported");
- UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
- "Unaligned attribute loads are not supported");
- UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
- instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
- "Non-32 bits PHYS reads are not implemented");
-
- const Node buffer{GetRegister(instr.gpr39)};
-
- u64 next_element = instr.attribute.fmt20.element;
- auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
- const auto LoadNextElement = [&](u32 reg_offset) {
- const Node attribute{instr.attribute.fmt20.IsPhysical()
- ? GetPhysicalInputAttribute(instr.gpr8, buffer)
- : GetInputAttribute(static_cast<Attribute::Index>(next_index),
- next_element, buffer)};
-
- SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
-
- // Load the next attribute element into the following register. If the element
- // to load goes beyond the vec4 size, load the first element of the next
- // attribute.
- next_element = (next_element + 1) % 4;
- next_index = next_index + (next_element == 0 ? 1 : 0);
- };
-
- const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
- for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
- LoadNextElement(reg_offset);
- }
- break;
- }
- case OpCode::Id::LD_C: {
- UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
-
- Node index = GetRegister(instr.gpr8);
-
- const Node op_a =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
-
- switch (instr.ld_c.type.Value()) {
- case Tegra::Shader::UniformType::Single:
- SetRegister(bb, instr.gpr0, op_a);
- break;
-
- case Tegra::Shader::UniformType::Double: {
- const Node op_b =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
-
- SetTemporary(bb, 0, op_a);
- SetTemporary(bb, 1, op_b);
- SetRegister(bb, instr.gpr0, GetTemporary(0));
- SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
- }
- break;
- }
- case OpCode::Id::LD_L:
- LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
- [[fallthrough]];
- case OpCode::Id::LD_S: {
- const auto GetAddress = [&](s32 offset) {
- ASSERT(offset % 4 == 0);
- const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
- return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
- };
- const auto GetMemory = [&](s32 offset) {
- return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
- : GetLocalMemory(GetAddress(offset));
- };
-
- switch (instr.ldst_sl.type.Value()) {
- case StoreType::Signed16:
- SetRegister(bb, instr.gpr0,
- Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
- break;
- case StoreType::Bits32:
- case StoreType::Bits64:
- case StoreType::Bits128: {
- const u32 count = [&] {
- switch (instr.ldst_sl.type.Value()) {
- case StoreType::Bits32:
- return 1;
- case StoreType::Bits64:
- return 2;
- case StoreType::Bits128:
- return 4;
- default:
- UNREACHABLE();
- return 0;
- }
- }();
- for (u32 i = 0; i < count; ++i) {
- SetTemporary(bb, i, GetMemory(i * 4));
- }
- for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
- instr.ldst_sl.type.Value());
- }
- break;
- }
- case OpCode::Id::LD:
- case OpCode::Id::LDG: {
- const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
- switch (opcode->get().GetId()) {
- case OpCode::Id::LD:
- UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
- return instr.generic.type;
- case OpCode::Id::LDG:
- return instr.ldg.type;
- default:
- UNREACHABLE();
- return {};
- }
- }();
-
- const auto [real_address_base, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true, false);
-
- const u32 size = GetMemorySize(type);
- const u32 count = Common::AlignUp(size, 32) / 32;
- if (!real_address_base || !base_address) {
- // Tracking failed, load zeroes.
- for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
- }
- break;
- }
-
- for (u32 i = 0; i < count; ++i) {
- const Node it_offset = Immediate(i * 4);
- const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
- Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-
- // To handle unaligned loads get the bytes used to dereference global memory and extract
- // those bytes from the loaded u32.
- if (IsUnaligned(type)) {
- gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
- }
-
- SetTemporary(bb, i, gmem);
- }
-
- for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- case OpCode::Id::ST_A: {
- UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
- "Indirect attribute loads are not supported");
- UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
- "Unaligned attribute loads are not supported");
-
- u64 element = instr.attribute.fmt20.element;
- auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
- const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
- for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
- Node dest;
- if (instr.attribute.fmt20.patch) {
- const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
- dest = MakeNode<PatchNode>(offset);
- } else {
- dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
- GetRegister(instr.gpr39));
- }
- const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
-
- bb.push_back(Operation(OperationCode::Assign, dest, src));
-
- // Load the next attribute element into the following register. If the element to load
- // goes beyond the vec4 size, load the first element of the next attribute.
- element = (element + 1) % 4;
- index = index + (element == 0 ? 1 : 0);
- }
- break;
- }
- case OpCode::Id::ST_L:
- LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
- [[fallthrough]];
- case OpCode::Id::ST_S: {
- const auto GetAddress = [&](s32 offset) {
- ASSERT(offset % 4 == 0);
- const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
- return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
- };
-
- const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
- const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
- const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
-
- switch (instr.ldst_sl.type.Value()) {
- case StoreType::Bits128:
- (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
- (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
- [[fallthrough]];
- case StoreType::Bits64:
- (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
- [[fallthrough]];
- case StoreType::Bits32:
- (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
- break;
- case StoreType::Unsigned16:
- case StoreType::Signed16: {
- Node address = GetAddress(0);
- Node memory = (this->*get_memory)(address);
- (this->*set_memory)(
- bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
- instr.ldst_sl.type.Value());
- }
- break;
- }
- case OpCode::Id::ST:
- case OpCode::Id::STG: {
- const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
- switch (opcode->get().GetId()) {
- case OpCode::Id::ST:
- UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
- return instr.generic.type;
- case OpCode::Id::STG:
- return instr.stg.type;
- default:
- UNREACHABLE();
- return {};
- }
- }();
-
- // For unaligned reads we have to read memory too.
- const bool is_read = IsUnaligned(type);
- const auto [real_address_base, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, is_read, true);
- if (!real_address_base || !base_address) {
- // Tracking failed, skip the store.
- break;
- }
-
- const u32 size = GetMemorySize(type);
- const u32 count = Common::AlignUp(size, 32) / 32;
- for (u32 i = 0; i < count; ++i) {
- const Node it_offset = Immediate(i * 4);
- const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
- const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- Node value = GetRegister(instr.gpr0.Value() + i);
-
- if (IsUnaligned(type)) {
- const u32 mask = GetUnalignedMask(type);
- value = InsertUnaligned(gmem, move(value), real_address, mask, size);
- }
-
- bb.push_back(Operation(OperationCode::Assign, gmem, value));
- }
- break;
- }
- case OpCode::Id::RED: {
- UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
- instr.red.type.Value());
- const auto [real_address, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true, true);
- if (!real_address || !base_address) {
- // Tracking failed, skip atomic.
- break;
- }
- Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- Node value = GetRegister(instr.gpr0);
- bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
- break;
- }
- case OpCode::Id::ATOM: {
- UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
- instr.atom.operation == AtomicOp::Dec ||
- instr.atom.operation == AtomicOp::SafeAdd,
- "operation={}", instr.atom.operation.Value());
- UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
- instr.atom.type == GlobalAtomicType::U64 ||
- instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
- instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
- "type={}", instr.atom.type.Value());
-
- const auto [real_address, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true, true);
- if (!real_address || !base_address) {
- // Tracking failed, skip atomic.
- break;
- }
-
- const bool is_signed =
- instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
- Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- SetRegister(bb, instr.gpr0,
- SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
- GetRegister(instr.gpr20)));
- break;
- }
- case OpCode::Id::ATOMS: {
- UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
- instr.atoms.operation == AtomicOp::Dec,
- "operation={}", instr.atoms.operation.Value());
- UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
- instr.atoms.type == AtomicType::U64,
- "type={}", instr.atoms.type.Value());
- const bool is_signed =
- instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
- const s32 offset = instr.atoms.GetImmediateOffset();
- Node address = GetRegister(instr.gpr8);
- address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
- SetRegister(bb, instr.gpr0,
- SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
- GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
- break;
- }
- case OpCode::Id::AL2P: {
- // Ignore al2p.direction since we don't care about it.
-
- // Calculate emulation fake physical address.
- const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
- const Node reg{GetRegister(instr.gpr8)};
- const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
-
- // Set the fake address to target register.
- SetRegister(bb, instr.gpr0, fake_address);
-
- // Signal the shader IR to declare all possible attributes and varyings
- uses_physical_attributes = true;
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
- Instruction instr,
- bool is_read, bool is_write) {
- const auto addr_register{GetRegister(instr.gmem.gpr)};
- const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
-
- const auto [base_address, index, offset] =
- TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT_OR_EXECUTE_MSG(
- base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
- "Global memory tracking failed");
-
- bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
-
- const GlobalMemoryBase descriptor{index, offset};
- const auto& entry = used_global_memory.try_emplace(descriptor).first;
- auto& usage = entry->second;
- usage.is_written |= is_write;
- usage.is_read |= is_read;
-
- const auto real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
-
- return {real_address, base_address, descriptor};
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::SystemVariable;
-
-using Index = Tegra::Shader::Attribute::Index;
-
-u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::NOP: {
- UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
- UNIMPLEMENTED_IF(instr.nop.trigger != 0);
- // With the previous preconditions, this instruction is a no-operation.
- break;
- }
- case OpCode::Id::EXIT: {
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
-
- switch (instr.flow.cond) {
- case Tegra::Shader::FlowCondition::Always:
- bb.push_back(Operation(OperationCode::Exit));
- if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
- // If this is an unconditional exit then just end processing here,
- // otherwise we have to account for the possibility of the condition
- // not being met, so continue processing the next instruction.
- pc = MAX_PROGRAM_LENGTH - 1;
- }
- break;
-
- case Tegra::Shader::FlowCondition::Fcsm_Tr:
- // TODO(bunnei): What is this used for? If we assume this conditon is not
- // satisifed, dual vertex shaders in Farming Simulator make more sense
- UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
- break;
-
- default:
- UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
- }
- break;
- }
- case OpCode::Id::KIL: {
- UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
-
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
-
- bb.push_back(Operation(OperationCode::Discard));
- break;
- }
- case OpCode::Id::S2R: {
- const Node value = [this, instr] {
- switch (instr.sys20) {
- case SystemVariable::LaneId:
- return Operation(OperationCode::ThreadId);
- case SystemVariable::InvocationId:
- return Operation(OperationCode::InvocationId);
- case SystemVariable::Ydirection:
- uses_y_negate = true;
- return Operation(OperationCode::YNegate);
- case SystemVariable::InvocationInfo:
- LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
- return Immediate(0x00ff'0000U);
- case SystemVariable::WscaleFactorXY:
- UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
- return Immediate(0U);
- case SystemVariable::WscaleFactorZ:
- UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
- return Immediate(0U);
- case SystemVariable::Tid: {
- Node val = Immediate(0);
- val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
- val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
- val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
- return val;
- }
- case SystemVariable::TidX:
- return Operation(OperationCode::LocalInvocationIdX);
- case SystemVariable::TidY:
- return Operation(OperationCode::LocalInvocationIdY);
- case SystemVariable::TidZ:
- return Operation(OperationCode::LocalInvocationIdZ);
- case SystemVariable::CtaIdX:
- return Operation(OperationCode::WorkGroupIdX);
- case SystemVariable::CtaIdY:
- return Operation(OperationCode::WorkGroupIdY);
- case SystemVariable::CtaIdZ:
- return Operation(OperationCode::WorkGroupIdZ);
- case SystemVariable::EqMask:
- case SystemVariable::LtMask:
- case SystemVariable::LeMask:
- case SystemVariable::GtMask:
- case SystemVariable::GeMask:
- uses_warps = true;
- switch (instr.sys20) {
- case SystemVariable::EqMask:
- return Operation(OperationCode::ThreadEqMask);
- case SystemVariable::LtMask:
- return Operation(OperationCode::ThreadLtMask);
- case SystemVariable::LeMask:
- return Operation(OperationCode::ThreadLeMask);
- case SystemVariable::GtMask:
- return Operation(OperationCode::ThreadGtMask);
- case SystemVariable::GeMask:
- return Operation(OperationCode::ThreadGeMask);
- default:
- UNREACHABLE();
- return Immediate(0u);
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
- return Immediate(0u);
- }
- }();
- SetRegister(bb, instr.gpr0, value);
-
- break;
- }
- case OpCode::Id::BRA: {
- Node branch;
- if (instr.bra.constant_buffer == 0) {
- const u32 target = pc + instr.bra.GetBranchTarget();
- branch = Operation(OperationCode::Branch, Immediate(target));
- } else {
- const u32 target = pc + 1;
- const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
- PRECISE, op_a, Immediate(3));
- const Node operand =
- Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- branch = Operation(OperationCode::BranchIndirect, operand);
- }
-
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- if (cc != Tegra::Shader::ConditionCode::T) {
- bb.push_back(Conditional(GetConditionCode(cc), {branch}));
- } else {
- bb.push_back(branch);
- }
- break;
- }
- case OpCode::Id::BRX: {
- Node operand;
- if (instr.brx.constant_buffer != 0) {
- const s32 target = pc + 1;
- const Node index = GetRegister(instr.gpr8);
- const Node op_a =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
- PRECISE, op_a, Immediate(3));
- operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- } else {
- const s32 target = pc + instr.brx.GetBranchExtend();
- const Node op_a = GetRegister(instr.gpr8);
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
- PRECISE, op_a, Immediate(3));
- operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- }
- const Node branch = Operation(OperationCode::BranchIndirect, operand);
-
- const ConditionCode cc = instr.flow_condition_code;
- if (cc != ConditionCode::T) {
- bb.push_back(Conditional(GetConditionCode(cc), {branch}));
- } else {
- bb.push_back(branch);
- }
- break;
- }
- case OpCode::Id::SSY: {
- UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
- "Constant buffer flow is not supported");
-
- if (disable_flow_stack) {
- break;
- }
-
- // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
- const u32 target = pc + instr.bra.GetBranchTarget();
- bb.push_back(
- Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
- break;
- }
- case OpCode::Id::PBK: {
- UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
- "Constant buffer PBK is not supported");
-
- if (disable_flow_stack) {
- break;
- }
-
- // PBK pushes to a stack the address where BRK will jump to.
- const u32 target = pc + instr.bra.GetBranchTarget();
- bb.push_back(
- Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
- break;
- }
- case OpCode::Id::SYNC: {
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
-
- if (decompiled) {
- break;
- }
-
- // The SYNC opcode jumps to the address previously set by the SSY opcode
- bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
- break;
- }
- case OpCode::Id::BRK: {
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
- if (decompiled) {
- break;
- }
-
- // The BRK opcode jumps to the address previously set by the PBK opcode
- bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
- break;
- }
- case OpCode::Id::IPA: {
- const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
- const auto attribute = instr.attribute.fmt28;
- const Index index = attribute.index;
-
- Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
- : GetInputAttribute(index, attribute.element);
-
- // Code taken from Ryujinx.
- if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
- const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
- if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
- Node position_w = GetInputAttribute(Index::Position, 3);
- value = Operation(OperationCode::FMul, move(value), move(position_w));
- }
- }
-
- if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
- value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
- }
-
- value = GetSaturatedFloat(move(value), instr.ipa.saturate);
-
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- case OpCode::Id::OUT_R: {
- UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
- "Stream buffer is not supported");
-
- if (instr.out.emit) {
- // gpr0 is used to store the next address and gpr8 contains the address to emit.
- // Hardware uses pointers here but we just ignore it
- bb.push_back(Operation(OperationCode::EmitVertex));
- SetRegister(bb, instr.gpr0, Immediate(0));
- }
- if (instr.out.cut) {
- bb.push_back(Operation(OperationCode::EndPrimitive));
- }
- break;
- }
- case OpCode::Id::ISBERD: {
- UNIMPLEMENTED_IF(instr.isberd.o != 0);
- UNIMPLEMENTED_IF(instr.isberd.skew != 0);
- UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
- UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
- LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
- SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
- break;
- }
- case OpCode::Id::BAR: {
- UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
- bb.push_back(Operation(OperationCode::Barrier));
- break;
- }
- case OpCode::Id::MEMBAR: {
- UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
- const OperationCode type = [instr] {
- switch (instr.membar.type) {
- case Tegra::Shader::MembarType::CTA:
- return OperationCode::MemoryBarrierGroup;
- case Tegra::Shader::MembarType::GL:
- return OperationCode::MemoryBarrierGlobal;
- default:
- UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
- return OperationCode::MemoryBarrierGlobal;
- }
- }();
- bb.push_back(Operation(type));
- break;
- }
- case OpCode::Id::DEPBAR: {
- LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::PSETP: {
- const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
- const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
- const Node predicate = Operation(combiner, op_a, op_b);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
-
- if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
- // enabled
- SetPredicate(bb, instr.psetp.pred0,
- Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
- second_pred));
- }
- break;
- }
- case OpCode::Id::CSETP: {
- const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
- const Node condition_code = GetConditionCode(instr.csetp.cc);
-
- const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
-
- if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
- SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
- }
- if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
- SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in PSET is not implemented");
-
- const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
- const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
- const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
-
- const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
- const Node predicate = Operation(combiner, first_pred, second_pred);
-
- const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
- const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
- const Node value =
- Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
- if (instr.pset.bf) {
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- } else {
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- }
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <utility>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-constexpr u64 NUM_CONDITION_CODES = 4;
-constexpr u64 NUM_PREDICATES = 7;
-} // namespace
-
-u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node apply_mask = [this, opcode, instr] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::R2P_IMM:
- case OpCode::Id::P2R_IMM:
- return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
-
- const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
- const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
- const auto get_entry = [this, cc](u64 entry) {
- return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
- };
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::R2P_IMM: {
- Node mask = GetRegister(instr.gpr8);
-
- for (u64 entry = 0; entry < num_entries; ++entry) {
- const u32 shift = static_cast<u32>(entry);
-
- Node apply = BitfieldExtract(apply_mask, shift, 1);
- Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
-
- Node compare = BitfieldExtract(mask, offset + shift, 1);
- Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
-
- Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
- bb.push_back(Conditional(condition, {move(code)}));
- }
- break;
- }
- case OpCode::Id::P2R_IMM: {
- Node value = Immediate(0);
- for (u64 entry = 0; entry < num_entries; ++entry) {
- Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
- Immediate(0));
- value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
- }
- value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
- value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
-
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
- break;
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::ShfType;
-using Tegra::Shader::ShfXmode;
-
-namespace {
-
-Node IsFull(Node shift) {
- return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
-}
-
-Node Shift(OperationCode opcode, Node value, Node shift) {
- Node shifted = Operation(opcode, move(value), shift);
- return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
-}
-
-Node ClampShift(Node shift, s32 size = 32) {
- shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
- return Operation(OperationCode::IMin, move(shift), Immediate(size));
-}
-
-Node WrapShift(Node shift, s32 size = 32) {
- return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
-}
-
-Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
- // These values are used when the shift value is less than 32
- Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
- Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
- Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
-
- if (type == ShfType::Bits32) {
- // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
- return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
- }
-
- // And these when it's larger than or 32
- const bool is_signed = type == ShfType::S64;
- const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
- Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
- Node greater = Shift(opcode, high, move(reduced));
-
- Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
- Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
-
- Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
- return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
-}
-
-Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
- // These values are used when the shift value is less than 32
- Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
- Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
- Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
-
- if (type == ShfType::Bits32) {
- // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
- return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
- }
-
- // And these when it's larger than or 32
- Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
- Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
-
- Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
- Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
-
- Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
- return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = [this, instr] {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- switch (const auto opid = opcode->get().GetId(); opid) {
- case OpCode::Id::SHR_C:
- case OpCode::Id::SHR_R:
- case OpCode::Id::SHR_IMM: {
- op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
-
- Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
- move(op_a), move(op_b));
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- case OpCode::Id::SHL_C:
- case OpCode::Id::SHL_R:
- case OpCode::Id::SHL_IMM: {
- Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- case OpCode::Id::SHF_RIGHT_R:
- case OpCode::Id::SHF_RIGHT_IMM:
- case OpCode::Id::SHF_LEFT_R:
- case OpCode::Id::SHF_LEFT_IMM: {
- UNIMPLEMENTED_IF(instr.generates_cc);
- UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
- instr.shf.xmode.Value());
-
- if (instr.is_b_imm) {
- op_b = Immediate(static_cast<u32>(instr.shf.immediate));
- }
- const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
- Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
-
- Node negated_shift = Operation(OperationCode::INegate, shift);
- Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
-
- const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
- Node value = (is_right ? ShiftRight : ShiftLeft)(
- move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
-
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureMiscMode;
-using Tegra::Shader::TextureProcessMode;
-using Tegra::Shader::TextureType;
-
-static std::size_t GetCoordCount(TextureType texture_type) {
- switch (texture_type) {
- case TextureType::Texture1D:
- return 1;
- case TextureType::Texture2D:
- return 2;
- case TextureType::Texture3D:
- case TextureType::TextureCube:
- return 3;
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
- return 0;
- }
-}
-
-u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
- bool is_bindless = false;
- switch (opcode->get().GetId()) {
- case OpCode::Id::TEX: {
- const TextureType texture_type{instr.tex.texture_type};
- const bool is_array = instr.tex.array != 0;
- const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
- const auto process_mode = instr.tex.GetTextureProcessMode();
- WriteTexInstructionFloat(
- bb, instr,
- GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
- break;
- }
- case OpCode::Id::TEX_B: {
- UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
-
- const TextureType texture_type{instr.tex_b.texture_type};
- const bool is_array = instr.tex_b.array != 0;
- const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
- const auto process_mode = instr.tex_b.GetTextureProcessMode();
- WriteTexInstructionFloat(bb, instr,
- GetTexCode(instr, texture_type, process_mode, depth_compare,
- is_array, is_aoffi, {instr.gpr20}));
- break;
- }
- case OpCode::Id::TEXS: {
- const TextureType texture_type{instr.texs.GetTextureType()};
- const bool is_array{instr.texs.IsArrayTexture()};
- const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
- const auto process_mode = instr.texs.GetTextureProcessMode();
-
- const Node4 components =
- GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
-
- if (instr.texs.fp32_flag) {
- WriteTexsInstructionFloat(bb, instr, components);
- } else {
- WriteTexsInstructionHalfFloat(bb, instr, components);
- }
- break;
- }
- case OpCode::Id::TLD4_B: {
- is_bindless = true;
- [[fallthrough]];
- }
- case OpCode::Id::TLD4: {
- UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
- "NDV is not implemented");
- const auto texture_type = instr.tld4.texture_type.Value();
- const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
- : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
- const bool is_array = instr.tld4.array != 0;
- const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
- : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
- : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
- WriteTexInstructionFloat(bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
- is_ptp, is_bindless));
- break;
- }
- case OpCode::Id::TLD4S: {
- constexpr std::size_t num_coords = 2;
- const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
- const Node op_a = GetRegister(instr.gpr8);
- const Node op_b = GetRegister(instr.gpr20);
-
- // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
- std::vector<Node> coords;
- std::vector<Node> aoffi;
- Node depth_compare;
- if (is_depth_compare) {
- // Note: TLD4S coordinate encoding works just like TEXS's
- const Node op_y = GetRegister(instr.gpr8.Value() + 1);
- coords.push_back(op_a);
- coords.push_back(op_y);
- if (is_aoffi) {
- aoffi = GetAoffiCoordinates(op_b, num_coords, true);
- depth_compare = GetRegister(instr.gpr20.Value() + 1);
- } else {
- depth_compare = op_b;
- }
- } else {
- // There's no depth compare
- coords.push_back(op_a);
- if (is_aoffi) {
- coords.push_back(GetRegister(instr.gpr8.Value() + 1));
- aoffi = GetAoffiCoordinates(op_b, num_coords, true);
- } else {
- coords.push_back(op_b);
- }
- }
- const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
-
- SamplerInfo info;
- info.is_shadow = is_depth_compare;
- const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
- {}, {}, component, element, {}};
- values[element] = Operation(OperationCode::TextureGather, meta, coords);
- }
-
- if (instr.tld4s.fp16_flag) {
- WriteTexsInstructionHalfFloat(bb, instr, values, true);
- } else {
- WriteTexsInstructionFloat(bb, instr, values, true);
- }
- break;
- }
- case OpCode::Id::TXD_B:
- is_bindless = true;
- [[fallthrough]];
- case OpCode::Id::TXD: {
- UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
-
- const bool is_array = instr.txd.is_array != 0;
- const auto derivate_reg = instr.gpr20.Value();
- const auto texture_type = instr.txd.texture_type.Value();
- const auto coord_count = GetCoordCount(texture_type);
- u64 base_reg = instr.gpr8.Value();
- Node index_var;
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(base_reg, info, index_var)
- : GetSampler(instr.sampler, info);
- Node4 values;
- if (!sampler) {
- std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
- WriteTexInstructionFloat(bb, instr, values);
- break;
- }
-
- if (is_bindless) {
- base_reg++;
- }
-
- std::vector<Node> coords;
- std::vector<Node> derivates;
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(base_reg + i));
- const std::size_t derivate = i * 2;
- derivates.push_back(GetRegister(derivate_reg + derivate));
- derivates.push_back(GetRegister(derivate_reg + derivate + 1));
- }
-
- Node array_node = {};
- if (is_array) {
- const Node info_reg = GetRegister(base_reg + coord_count);
- array_node = BitfieldExtract(info_reg, 0, 16);
- }
-
- for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
- {}, {}, {}, element, index_var};
- values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
- }
-
- WriteTexInstructionFloat(bb, instr, values);
-
- break;
- }
- case OpCode::Id::TXQ_B:
- is_bindless = true;
- [[fallthrough]];
- case OpCode::Id::TXQ: {
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
- : GetSampler(instr.sampler, {});
-
- if (!sampler) {
- u32 indexer = 0;
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.txq.IsComponentEnabled(element)) {
- continue;
- }
- const Node value = Immediate(0);
- SetTemporary(bb, indexer++, value);
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
-
- u32 indexer = 0;
- switch (instr.txq.query_type) {
- case Tegra::Shader::TextureQueryType::Dimension: {
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.txq.IsComponentEnabled(element)) {
- continue;
- }
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
- const Node value =
- Operation(OperationCode::TextureQueryDimensions, meta,
- GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
- SetTemporary(bb, indexer++, value);
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
- }
- break;
- }
- case OpCode::Id::TMML_B:
- is_bindless = true;
- [[fallthrough]];
- case OpCode::Id::TMML: {
- UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
- "NDV is not implemented");
-
- const auto texture_type = instr.tmml.texture_type.Value();
- const bool is_array = instr.tmml.array != 0;
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
- : GetSampler(instr.sampler, info);
-
- if (!sampler) {
- u32 indexer = 0;
- for (u32 element = 0; element < 2; ++element) {
- if (!instr.tmml.IsComponentEnabled(element)) {
- continue;
- }
- const Node value = Immediate(0);
- SetTemporary(bb, indexer++, value);
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
-
- const u64 base_index = is_array ? 1 : 0;
- const u64 num_components = [texture_type] {
- switch (texture_type) {
- case TextureType::Texture1D:
- return 1;
- case TextureType::Texture2D:
- return 2;
- case TextureType::TextureCube:
- return 3;
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
- return 2;
- }
- }();
- // TODO: What's the array component used for?
-
- std::vector<Node> coords;
- coords.reserve(num_components);
- for (u64 component = 0; component < num_components; ++component) {
- coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
- }
-
- u32 indexer = 0;
- for (u32 element = 0; element < 2; ++element) {
- if (!instr.tmml.IsComponentEnabled(element)) {
- continue;
- }
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
- Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
- SetTemporary(bb, indexer++, std::move(value));
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- case OpCode::Id::TLD: {
- UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
-
- WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
- break;
- }
- case OpCode::Id::TLDS: {
- const TextureType texture_type{instr.tlds.GetTextureType()};
- const bool is_array{instr.tlds.IsArrayTexture()};
-
- UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
-
- const Node4 components = GetTldsCode(instr, texture_type, is_array);
-
- if (instr.tlds.fp32_flag) {
- WriteTexsInstructionFloat(bb, instr, components);
- } else {
- WriteTexsInstructionHalfFloat(bb, instr, components);
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
- SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
- if (info.IsComplete()) {
- return info;
- }
- if (!sampler) {
- LOG_WARNING(HW_GPU, "Unknown sampler info");
- info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
- info.is_array = info.is_array.value_or(false);
- info.is_shadow = info.is_shadow.value_or(false);
- info.is_buffer = info.is_buffer.value_or(false);
- return info;
- }
- info.type = info.type.value_or(sampler->texture_type);
- info.is_array = info.is_array.value_or(sampler->is_array != 0);
- info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
- info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
- return info;
-}
-
-std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
- SamplerInfo sampler_info) {
- const u32 offset = static_cast<u32>(sampler.index.Value());
- const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
-
- // If this sampler has already been used, return the existing mapping.
- const auto it =
- std::find_if(used_samplers.begin(), used_samplers.end(),
- [offset](const SamplerEntry& entry) { return entry.offset == offset; });
- if (it != used_samplers.end()) {
- ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
- return *it;
- }
-
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer, false);
-}
-
-std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
- SamplerInfo info, Node& index_var) {
- const Node sampler_register = GetRegister(reg);
- const auto [base_node, tracked_sampler_info] =
- TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
- if (!base_node) {
- UNREACHABLE();
- return std::nullopt;
- }
-
- if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
- const u32 buffer = sampler_info->index;
- const u32 offset = sampler_info->offset;
- info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
-
- // If this sampler has already been used, return the existing mapping.
- const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer, offset](const SamplerEntry& entry) {
- return entry.buffer == buffer && entry.offset == offset;
- });
- if (it != used_samplers.end()) {
- ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow);
- return *it;
- }
-
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer, false);
- }
- if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
- const std::pair indices = sampler_info->indices;
- const std::pair offsets = sampler_info->offsets;
- info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
-
- // Try to use an already created sampler if it exists
- const auto it =
- std::find_if(used_samplers.begin(), used_samplers.end(),
- [indices, offsets](const SamplerEntry& entry) {
- return offsets == std::pair{entry.offset, entry.secondary_offset} &&
- indices == std::pair{entry.buffer, entry.secondary_buffer};
- });
- if (it != used_samplers.end()) {
- ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
- return *it;
- }
-
- // Otherwise create a new mapping for this sampler
- const u32 next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer);
- }
- if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
- const u32 base_offset = sampler_info->base_offset / 4;
- index_var = GetCustomVariable(sampler_info->bindless_var);
- info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
-
- // If this sampler has already been used, return the existing mapping.
- const auto it = std::find_if(
- used_samplers.begin(), used_samplers.end(),
- [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
- if (it != used_samplers.end()) {
- ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
- it->is_indexed);
- return *it;
- }
-
- uses_indexed_samplers = true;
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer, true);
- }
- return std::nullopt;
-}
-
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
- u32 dest_elem = 0;
- for (u32 elem = 0; elem < 4; ++elem) {
- if (!instr.tex.IsComponentEnabled(elem)) {
- // Skip disabled components
- continue;
- }
- SetTemporary(bb, dest_elem++, components[elem]);
- }
- // After writing values in temporals, move them to the real registers
- for (u32 i = 0; i < dest_elem; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
-}
-
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
- bool ignore_mask) {
- // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
- // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
-
- u32 dest_elem = 0;
- for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
- continue;
- SetTemporary(bb, dest_elem++, components[component]);
- }
-
- for (u32 i = 0; i < dest_elem; ++i) {
- if (i < 2) {
- // Write the first two swizzle components to gpr0 and gpr0+1
- SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
- } else {
- ASSERT(instr.texs.HasTwoDestinations());
- // Write the rest of the swizzle components to gpr28 and gpr28+1
- SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
- }
- }
-}
-
-void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
- const Node4& components, bool ignore_mask) {
- // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
- // float instruction).
-
- Node4 values;
- u32 dest_elem = 0;
- for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
- continue;
- values[dest_elem++] = components[component];
- }
- if (dest_elem == 0)
- return;
-
- std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
-
- const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
- if (dest_elem <= 2) {
- SetRegister(bb, instr.gpr0, first_value);
- return;
- }
-
- SetTemporary(bb, 0, first_value);
- SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
-
- SetRegister(bb, instr.gpr0, GetTemporary(0));
- SetRegister(bb, instr.gpr28, GetTemporary(1));
-}
-
-Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset,
- std::vector<Node> aoffi,
- std::optional<Tegra::Shader::Register> bindless_reg) {
- const bool is_array = array != nullptr;
- const bool is_shadow = depth_compare != nullptr;
- const bool is_bindless = bindless_reg.has_value();
-
- ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
- "Illegal texture type");
-
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- info.is_shadow = is_shadow;
- info.is_buffer = false;
-
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
- : GetSampler(instr.sampler, info);
- if (!sampler) {
- return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
- }
-
- const bool lod_needed = process_mode == TextureProcessMode::LZ ||
- process_mode == TextureProcessMode::LL ||
- process_mode == TextureProcessMode::LLA;
- const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
-
- Node bias;
- Node lod;
- switch (process_mode) {
- case TextureProcessMode::None:
- break;
- case TextureProcessMode::LZ:
- lod = Immediate(0.0f);
- break;
- case TextureProcessMode::LB:
- // If present, lod or bias are always stored in the register indexed by the gpr20 field with
- // an offset depending on the usage of the other registers.
- bias = GetRegister(instr.gpr20.Value() + bias_offset);
- break;
- case TextureProcessMode::LL:
- lod = GetRegister(instr.gpr20.Value() + bias_offset);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
- break;
- }
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
- lod, {}, element, index_var};
- values[element] = Operation(opcode, meta, coords);
- }
-
- return values;
-}
-
-Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array,
- bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
- const bool lod_bias_enabled{
- (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
-
- const bool is_bindless = bindless_reg.has_value();
-
- u64 parameter_register = instr.gpr20.Value();
- if (is_bindless) {
- ++parameter_register;
- }
-
- const u32 bias_lod_offset = (is_bindless ? 1 : 0);
- if (lod_bias_enabled) {
- ++parameter_register;
- }
-
- const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
- lod_bias_enabled, 4, 5);
- const auto coord_count = std::get<0>(coord_counts);
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
- const u64 coord_register = array_register + (is_array ? 1 : 0);
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(coord_register + i));
- }
- // 1D.DC in OpenGL the 2nd component is ignored.
- if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
- coords.push_back(Immediate(0.0f));
- }
-
- const Node array = is_array ? GetRegister(array_register) : nullptr;
-
- std::vector<Node> aoffi;
- if (is_aoffi) {
- aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
- }
-
- Node dc;
- if (depth_compare) {
- // Depth is always stored in the register signaled by gpr20 or in the next register if lod
- // or bias are used
- dc = GetRegister(parameter_register++);
- }
-
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
- aoffi, bindless_reg);
-}
-
-Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array) {
- const bool lod_bias_enabled =
- (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
-
- const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
- lod_bias_enabled, 4, 4);
- const auto coord_count = std::get<0>(coord_counts);
-
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
- const u64 coord_register = array_register + (is_array ? 1 : 0);
- const u64 last_coord_register =
- (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
- ? static_cast<u64>(instr.gpr20.Value())
- : coord_register + 1;
- const u32 bias_offset = coord_count > 2 ? 1 : 0;
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < coord_count; ++i) {
- const bool last = (i == (coord_count - 1)) && (coord_count > 1);
- coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
- }
-
- const Node array = is_array ? GetRegister(array_register) : nullptr;
-
- Node dc;
- if (depth_compare) {
- // Depth is always stored in the register signaled by gpr20 or in the next register if lod
- // or bias are used
- const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
- dc = GetRegister(depth_register);
- }
-
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
- {});
-}
-
-Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
- ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
-
- const std::size_t coord_count = GetCoordCount(texture_type);
-
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
- const u64 coord_register = array_register + (is_array ? 1 : 0);
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(coord_register + i));
- }
-
- u64 parameter_register = instr.gpr20.Value();
-
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- info.is_shadow = depth_compare;
-
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
- : GetSampler(instr.sampler, info);
- Node4 values;
- if (!sampler) {
- for (u32 element = 0; element < values.size(); ++element) {
- values[element] = Immediate(0);
- }
- return values;
- }
-
- std::vector<Node> aoffi, ptp;
- if (is_aoffi) {
- aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
- } else if (is_ptp) {
- ptp = GetPtpCoordinates(
- {GetRegister(parameter_register++), GetRegister(parameter_register++)});
- }
-
- Node dc;
- if (depth_compare) {
- dc = GetRegister(parameter_register++);
- }
-
- const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
- : Immediate(static_cast<u32>(instr.tld4.component));
-
- for (u32 element = 0; element < values.size(); ++element) {
- auto coords_copy = coords;
- MetaTexture meta{
- *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
- index_var};
- values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
- }
-
- return values;
-}
-
-Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
- const auto texture_type{instr.tld.texture_type};
- const bool is_array{instr.tld.is_array != 0};
- const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
- const std::size_t coord_count{GetCoordCount(texture_type)};
-
- u64 gpr8_cursor{instr.gpr8.Value()};
- const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
-
- std::vector<Node> coords;
- coords.reserve(coord_count);
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(gpr8_cursor++));
- }
-
- u64 gpr20_cursor{instr.gpr20.Value()};
- // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
- const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
- // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
- // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
-
- const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- auto coords_copy = coords;
- MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
- values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
- }
-
- return values;
-}
-
-Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- info.is_shadow = false;
- const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
-
- const std::size_t type_coord_count = GetCoordCount(texture_type);
- const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
- const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
-
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // if is array gpr20 is used
- const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
-
- const u64 last_coord_register =
- ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
- ? static_cast<u64>(instr.gpr20.Value())
- : coord_register + 1;
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < type_coord_count; ++i) {
- const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
- coords.push_back(
- GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
- }
-
- const Node array = is_array ? GetRegister(array_register) : nullptr;
- // When lod is used always is in gpr20
- const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
-
- std::vector<Node> aoffi;
- if (aoffi_enabled) {
- aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
- }
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- auto coords_copy = coords;
- MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
- values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
- }
- return values;
-}
-
-std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
- TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
- std::size_t max_coords, std::size_t max_inputs) {
- const std::size_t coord_count = GetCoordCount(texture_type);
-
- std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
- const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
- if (total_coord_count > max_coords || total_reg_count > max_inputs) {
- UNIMPLEMENTED_MSG("Unsupported Texture operation");
- total_coord_count = std::min(total_coord_count, max_coords);
- }
- // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
- total_coord_count +=
- (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
-
- return {coord_count, total_coord_count};
-}
-
-std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
- bool is_tld4) {
- const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
- const u32 size = is_tld4 ? 6 : 4;
- const s32 wrap_value = is_tld4 ? 32 : 8;
- const s32 diff_value = is_tld4 ? 64 : 16;
- const u32 mask = (1U << size) - 1;
-
- std::vector<Node> aoffi;
- aoffi.reserve(coord_count);
-
- const auto aoffi_immediate{
- TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
- if (!aoffi_immediate) {
- // Variable access, not supported on AMD.
- LOG_WARNING(HW_GPU,
- "AOFFI constant folding failed, some hardware might have graphical issues");
- for (std::size_t coord = 0; coord < coord_count; ++coord) {
- const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
- const Node condition =
- Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
- const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
- aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
- }
- return aoffi;
- }
-
- for (std::size_t coord = 0; coord < coord_count; ++coord) {
- s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
- if (value >= wrap_value) {
- value -= diff_value;
- }
- aoffi.push_back(Immediate(value));
- }
- return aoffi;
-}
-
-std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
- static constexpr u32 num_entries = 8;
-
- std::vector<Node> ptp;
- ptp.reserve(num_entries);
-
- const auto global_size = static_cast<s64>(global_code.size());
- const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
- const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
- if (!low || !high) {
- for (u32 entry = 0; entry < num_entries; ++entry) {
- const u32 reg = entry / 4;
- const u32 offset = entry % 4;
- const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
- const Node condition =
- Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
- const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
- ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
- }
- return ptp;
- }
-
- const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
- for (u32 entry = 0; entry < num_entries; ++entry) {
- s32 value = (immediate >> (entry * 8)) & 0b111111;
- if (value >= 32) {
- value -= 64;
- }
- ptp.push_back(Immediate(value));
- }
-
- return ptp;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::VideoType;
-using Tegra::Shader::VmadShr;
-using Tegra::Shader::VmnmxOperation;
-using Tegra::Shader::VmnmxType;
-
-u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (opcode->get().GetId() == OpCode::Id::VMNMX) {
- DecodeVMNMX(bb, instr);
- return pc;
- }
-
- const Node op_a =
- GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
- instr.video.type_a, instr.video.byte_height_a);
- const Node op_b = [this, instr] {
- if (instr.video.use_register_b) {
- return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
- instr.video.signed_b, instr.video.type_b,
- instr.video.byte_height_b);
- }
- if (instr.video.signed_b) {
- const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
- return Immediate(static_cast<u32>(imm));
- } else {
- return Immediate(instr.alu.GetImm20_16());
- }
- }();
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::VMAD: {
- const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
- const Node op_c = GetRegister(instr.gpr39);
-
- Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
- value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
-
- if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
- const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
- value =
- SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
- }
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::VSETP: {
- // We can't use the constant predicate as destination.
- ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
- const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
- const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
-
- const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
-
- if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
- SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
- u64 byte_height) {
- if (!is_chunk) {
- return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
- }
-
- switch (type) {
- case VideoType::Size16_Low:
- return BitfieldExtract(op, 0, 16);
- case VideoType::Size16_High:
- return BitfieldExtract(op, 16, 16);
- case VideoType::Size32:
- // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
- // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
- UNIMPLEMENTED();
- return Immediate(0);
- case VideoType::Invalid:
- UNREACHABLE_MSG("Invalid instruction encoding");
- return Immediate(0);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
-}
-
-void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
- UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
- UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
- UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
- UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
- UNIMPLEMENTED_IF(instr.vmnmx.sat);
- UNIMPLEMENTED_IF(instr.generates_cc);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = GetRegister(instr.gpr20);
- Node op_c = GetRegister(instr.gpr39);
-
- const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
- const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
-
- const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
- Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
-
- switch (instr.vmnmx.operation) {
- case VmnmxOperation::Mrg_16H:
- value = BitfieldInsert(move(op_c), move(value), 16, 16);
- break;
- case VmnmxOperation::Mrg_16L:
- value = BitfieldInsert(move(op_c), move(value), 0, 16);
- break;
- case VmnmxOperation::Mrg_8B0:
- value = BitfieldInsert(move(op_c), move(value), 0, 8);
- break;
- case VmnmxOperation::Mrg_8B2:
- value = BitfieldInsert(move(op_c), move(value), 16, 8);
- break;
- case VmnmxOperation::Acc:
- value = Operation(OperationCode::IAdd, move(value), move(op_c));
- break;
- case VmnmxOperation::Min:
- value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
- break;
- case VmnmxOperation::Max:
- value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
- break;
- case VmnmxOperation::Nop:
- break;
- default:
- UNREACHABLE();
- break;
- }
-
- SetRegister(bb, instr.gpr0, move(value));
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::ShuffleOperation;
-using Tegra::Shader::VoteOperation;
-
-namespace {
-
-OperationCode GetOperationCode(VoteOperation vote_op) {
- switch (vote_op) {
- case VoteOperation::All:
- return OperationCode::VoteAll;
- case VoteOperation::Any:
- return OperationCode::VoteAny;
- case VoteOperation::Eq:
- return OperationCode::VoteEqual;
- default:
- UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
- return OperationCode::VoteAll;
- }
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- // Signal the backend that this shader uses warp instructions.
- uses_warps = true;
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::VOTE: {
- const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
- const Node active = Operation(OperationCode::BallotThread, value);
- const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
- SetRegister(bb, instr.gpr0, active);
- SetPredicate(bb, instr.vote.dest_pred, vote);
- break;
- }
- case OpCode::Id::SHFL: {
- Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
- : GetRegister(instr.gpr39);
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
-
- Node thread_id = Operation(OperationCode::ThreadId);
- Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
- Node seg_mask = BitfieldExtract(mask, 8, 16);
-
- Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
- Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
- Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
- Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
-
- Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
- switch (instr.shfl.operation) {
- case ShuffleOperation::Idx:
- return Operation(OperationCode::IBitwiseOr,
- Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
- min_thread_id);
- case ShuffleOperation::Down:
- return Operation(OperationCode::IAdd, thread_id, index);
- case ShuffleOperation::Up:
- return Operation(OperationCode::IAdd, thread_id,
- Operation(OperationCode::INegate, index));
- case ShuffleOperation::Bfly:
- return Operation(OperationCode::IBitwiseXor, thread_id, index);
- }
- UNREACHABLE();
- return Immediate(0U);
- }();
-
- Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
- } else {
- return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
- }
- }();
-
- SetPredicate(bb, instr.shfl.pred48, in_bounds);
- SetRegister(
- bb, instr.gpr0,
- Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
- break;
- }
- case OpCode::Id::FSWZADD: {
- UNIMPLEMENTED_IF(instr.fswzadd.ndv);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = GetRegister(instr.gpr20);
- Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
- SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
- break;
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-
-u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- UNIMPLEMENTED_IF(instr.xmad.sign_a);
- UNIMPLEMENTED_IF(instr.xmad.sign_b);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in XMAD is not implemented");
-
- Node op_a = GetRegister(instr.gpr8);
-
- // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
- UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
- const bool is_signed_a = instr.xmad.sign_a == 1;
- const bool is_signed_b = instr.xmad.sign_b == 1;
- const bool is_signed_c = is_signed_a;
-
- auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
- op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::XMAD_CR:
- return {instr.xmad.merge_56,
- instr.xmad.product_shift_left_second,
- instr.xmad.high_b,
- instr.xmad.mode_cbf,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- GetRegister(instr.gpr39)};
- case OpCode::Id::XMAD_RR:
- return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
- instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
- case OpCode::Id::XMAD_RC:
- return {false,
- false,
- instr.xmad.high_b,
- instr.xmad.mode_cbf,
- GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::XMAD_IMM:
- return {instr.xmad.merge_37,
- instr.xmad.product_shift_left,
- false,
- instr.xmad.mode,
- Immediate(static_cast<u32>(instr.xmad.imm20_16)),
- GetRegister(instr.gpr39)};
- default:
- UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
- return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
- }
- }();
-
- op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
- instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
-
- const Node original_b = op_b_binding;
- const Node op_b =
- SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
- is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
-
- // we already check sign_a and sign_b is difference or not before so just use one in here.
- Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
- if (is_psl) {
- product =
- SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
- }
- SetTemporary(bb, 0, product);
- product = GetTemporary(0);
-
- Node original_c = op_c;
- const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
- op_c = [&] {
- switch (set_mode) {
- case Tegra::Shader::XmadMode::None:
- return original_c;
- case Tegra::Shader::XmadMode::CLo:
- return BitfieldExtract(std::move(original_c), 0, 16);
- case Tegra::Shader::XmadMode::CHi:
- return BitfieldExtract(std::move(original_c), 16, 16);
- case Tegra::Shader::XmadMode::CBcc: {
- Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
- original_b, Immediate(16));
- return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
- std::move(shifted_b));
- }
- case Tegra::Shader::XmadMode::CSfu: {
- const Node comp_a =
- GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
- const Node comp_b =
- GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
- const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
-
- const Node comp_minus_a = GetPredicateComparisonInteger(
- PredCondition::NE, is_signed_a,
- SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
- Immediate(0x80000000)),
- Immediate(0));
- const Node comp_minus_b = GetPredicateComparisonInteger(
- PredCondition::NE, is_signed_b,
- SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
- Immediate(0x80000000)),
- Immediate(0));
-
- Node new_c = Operation(
- OperationCode::Select, comp_minus_a,
- SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
- original_c);
- new_c = Operation(
- OperationCode::Select, comp_minus_b,
- SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
- std::move(new_c));
-
- return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
- }
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- SetTemporary(bb, 1, op_c);
- op_c = GetTemporary(1);
-
- // TODO(Rodrigo): Use an appropiate sign for this operation
- Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
- SetTemporary(bb, 2, sum);
- sum = GetTemporary(2);
- if (is_merge) {
- const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
- Immediate(0), Immediate(16));
- const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
- Immediate(16));
- sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
- }
-
- SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
- SetRegister(bb, instr.gpr0, std::move(sum));
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
deleted file mode 100644
index 2647865d4..000000000
--- a/src/video_core/shader/expr.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <variant>
-
-#include "video_core/shader/expr.h"
-
-namespace VideoCommon::Shader {
-namespace {
-bool ExprIsBoolean(const Expr& expr) {
- return std::holds_alternative<ExprBoolean>(*expr);
-}
-
-bool ExprBooleanGet(const Expr& expr) {
- return std::get_if<ExprBoolean>(expr.get())->value;
-}
-} // Anonymous namespace
-
-bool ExprAnd::operator==(const ExprAnd& b) const {
- return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
-}
-
-bool ExprAnd::operator!=(const ExprAnd& b) const {
- return !operator==(b);
-}
-
-bool ExprOr::operator==(const ExprOr& b) const {
- return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
-}
-
-bool ExprOr::operator!=(const ExprOr& b) const {
- return !operator==(b);
-}
-
-bool ExprNot::operator==(const ExprNot& b) const {
- return *operand1 == *b.operand1;
-}
-
-bool ExprNot::operator!=(const ExprNot& b) const {
- return !operator==(b);
-}
-
-Expr MakeExprNot(Expr first) {
- if (std::holds_alternative<ExprNot>(*first)) {
- return std::get_if<ExprNot>(first.get())->operand1;
- }
- return MakeExpr<ExprNot>(std::move(first));
-}
-
-Expr MakeExprAnd(Expr first, Expr second) {
- if (ExprIsBoolean(first)) {
- return ExprBooleanGet(first) ? second : first;
- }
- if (ExprIsBoolean(second)) {
- return ExprBooleanGet(second) ? first : second;
- }
- return MakeExpr<ExprAnd>(std::move(first), std::move(second));
-}
-
-Expr MakeExprOr(Expr first, Expr second) {
- if (ExprIsBoolean(first)) {
- return ExprBooleanGet(first) ? first : second;
- }
- if (ExprIsBoolean(second)) {
- return ExprBooleanGet(second) ? second : first;
- }
- return MakeExpr<ExprOr>(std::move(first), std::move(second));
-}
-
-bool ExprAreEqual(const Expr& first, const Expr& second) {
- return (*first) == (*second);
-}
-
-bool ExprAreOpposite(const Expr& first, const Expr& second) {
- if (std::holds_alternative<ExprNot>(*first)) {
- return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
- }
- if (std::holds_alternative<ExprNot>(*second)) {
- return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
- }
- return false;
-}
-
-bool ExprIsTrue(const Expr& first) {
- if (ExprIsBoolean(first)) {
- return ExprBooleanGet(first);
- }
- return false;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
deleted file mode 100644
index cda284c72..000000000
--- a/src/video_core/shader/expr.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <variant>
-
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Pred;
-
-class ExprAnd;
-class ExprBoolean;
-class ExprCondCode;
-class ExprGprEqual;
-class ExprNot;
-class ExprOr;
-class ExprPredicate;
-class ExprVar;
-
-using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
- ExprBoolean, ExprGprEqual>;
-using Expr = std::shared_ptr<ExprData>;
-
-class ExprAnd final {
-public:
- explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
-
- bool operator==(const ExprAnd& b) const;
- bool operator!=(const ExprAnd& b) const;
-
- Expr operand1;
- Expr operand2;
-};
-
-class ExprOr final {
-public:
- explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
-
- bool operator==(const ExprOr& b) const;
- bool operator!=(const ExprOr& b) const;
-
- Expr operand1;
- Expr operand2;
-};
-
-class ExprNot final {
-public:
- explicit ExprNot(Expr a) : operand1{std::move(a)} {}
-
- bool operator==(const ExprNot& b) const;
- bool operator!=(const ExprNot& b) const;
-
- Expr operand1;
-};
-
-class ExprVar final {
-public:
- explicit ExprVar(u32 index) : var_index{index} {}
-
- bool operator==(const ExprVar& b) const {
- return var_index == b.var_index;
- }
-
- bool operator!=(const ExprVar& b) const {
- return !operator==(b);
- }
-
- u32 var_index;
-};
-
-class ExprPredicate final {
-public:
- explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
-
- bool operator==(const ExprPredicate& b) const {
- return predicate == b.predicate;
- }
-
- bool operator!=(const ExprPredicate& b) const {
- return !operator==(b);
- }
-
- u32 predicate;
-};
-
-class ExprCondCode final {
-public:
- explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
-
- bool operator==(const ExprCondCode& b) const {
- return cc == b.cc;
- }
-
- bool operator!=(const ExprCondCode& b) const {
- return !operator==(b);
- }
-
- ConditionCode cc;
-};
-
-class ExprBoolean final {
-public:
- explicit ExprBoolean(bool val) : value{val} {}
-
- bool operator==(const ExprBoolean& b) const {
- return value == b.value;
- }
-
- bool operator!=(const ExprBoolean& b) const {
- return !operator==(b);
- }
-
- bool value;
-};
-
-class ExprGprEqual final {
-public:
- explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
-
- bool operator==(const ExprGprEqual& b) const {
- return gpr == b.gpr && value == b.value;
- }
-
- bool operator!=(const ExprGprEqual& b) const {
- return !operator==(b);
- }
-
- u32 gpr;
- u32 value;
-};
-
-template <typename T, typename... Args>
-Expr MakeExpr(Args&&... args) {
- static_assert(std::is_convertible_v<T, ExprData>);
- return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
-}
-
-bool ExprAreEqual(const Expr& first, const Expr& second);
-
-bool ExprAreOpposite(const Expr& first, const Expr& second);
-
-Expr MakeExprNot(Expr first);
-
-Expr MakeExprAnd(Expr first, Expr second);
-
-Expr MakeExprOr(Expr first, Expr second);
-
-bool ExprIsTrue(const Expr& first);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
deleted file mode 100644
index e18ccba8e..000000000
--- a/src/video_core/shader/memory_util.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstddef>
-
-#include <boost/container_hash/hash.hpp>
-
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
- return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
- // Sched instructions appear once every 4 instructions.
- constexpr std::size_t SchedPeriod = 4;
- const std::size_t absolute_offset = offset - main_offset;
- return (absolute_offset % SchedPeriod) == 0;
-}
-
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
- // This is the encoded version of BRA that jumps to itself. All Nvidia
- // shaders end with one.
- static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
- static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
-
- const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- std::size_t offset = start_offset;
- while (offset < program.size()) {
- const u64 instruction = program[offset];
- if (!IsSchedInstruction(offset, start_offset)) {
- if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
- // End on Maxwell's "nop" instruction
- break;
- }
- if (instruction == 0) {
- break;
- }
- }
- ++offset;
- }
- // The last instruction is included in the program size
- return std::min(offset + 1, program.size());
-}
-
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
- const u8* host_ptr, bool is_compute) {
- ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
- ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
- memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
- code.resize(CalculateProgramSize(code, is_compute));
- return code;
-}
-
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
- const ProgramCode& code_b) {
- size_t unique_identifier = boost::hash_value(code);
- if (is_a) {
- // VertexA programs include two programs
- boost::hash_combine(unique_identifier, boost::hash_value(code_b));
- }
- return static_cast<u64>(unique_identifier);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
deleted file mode 100644
index 4624d38e6..000000000
--- a/src/video_core/shader/memory_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCommon::Shader {
-
-using ProgramCode = std::vector<u64>;
-
-constexpr u32 STAGE_MAIN_OFFSET = 10;
-constexpr u32 KERNEL_MAIN_OFFSET = 0;
-
-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
-
-/// Gets if the current instruction offset is a scheduler instruction
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
- const u8* host_ptr, bool is_compute);
-
-/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
- const ProgramCode& code_b = {});
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
deleted file mode 100644
index b54d33763..000000000
--- a/src/video_core/shader/node.h
+++ /dev/null
@@ -1,701 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-enum class OperationCode {
- Assign, /// (float& dest, float src) -> void
-
- Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
-
- FAdd, /// (MetaArithmetic, float a, float b) -> float
- FMul, /// (MetaArithmetic, float a, float b) -> float
- FDiv, /// (MetaArithmetic, float a, float b) -> float
- FFma, /// (MetaArithmetic, float a, float b, float c) -> float
- FNegate, /// (MetaArithmetic, float a) -> float
- FAbsolute, /// (MetaArithmetic, float a) -> float
- FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
- FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
- FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
- FMin, /// (MetaArithmetic, float a, float b) -> float
- FMax, /// (MetaArithmetic, float a, float b) -> float
- FCos, /// (MetaArithmetic, float a) -> float
- FSin, /// (MetaArithmetic, float a) -> float
- FExp2, /// (MetaArithmetic, float a) -> float
- FLog2, /// (MetaArithmetic, float a) -> float
- FInverseSqrt, /// (MetaArithmetic, float a) -> float
- FSqrt, /// (MetaArithmetic, float a) -> float
- FRoundEven, /// (MetaArithmetic, float a) -> float
- FFloor, /// (MetaArithmetic, float a) -> float
- FCeil, /// (MetaArithmetic, float a) -> float
- FTrunc, /// (MetaArithmetic, float a) -> float
- FCastInteger, /// (MetaArithmetic, int a) -> float
- FCastUInteger, /// (MetaArithmetic, uint a) -> float
- FSwizzleAdd, /// (float a, float b, uint mask) -> float
-
- IAdd, /// (MetaArithmetic, int a, int b) -> int
- IMul, /// (MetaArithmetic, int a, int b) -> int
- IDiv, /// (MetaArithmetic, int a, int b) -> int
- INegate, /// (MetaArithmetic, int a) -> int
- IAbsolute, /// (MetaArithmetic, int a) -> int
- IMin, /// (MetaArithmetic, int a, int b) -> int
- IMax, /// (MetaArithmetic, int a, int b) -> int
- ICastFloat, /// (MetaArithmetic, float a) -> int
- ICastUnsigned, /// (MetaArithmetic, uint a) -> int
- ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
- ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
- IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
- IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseNot, /// (MetaArithmetic, int a) -> int
- IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
- IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
- IBitCount, /// (MetaArithmetic, int) -> int
- IBitMSB, /// (MetaArithmetic, int) -> int
-
- UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
- UMul, /// (MetaArithmetic, uint a, uint b) -> uint
- UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
- UMin, /// (MetaArithmetic, uint a, uint b) -> uint
- UMax, /// (MetaArithmetic, uint a, uint b) -> uint
- UCastFloat, /// (MetaArithmetic, float a) -> uint
- UCastSigned, /// (MetaArithmetic, int a) -> uint
- ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
- ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
- UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
- UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
- UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
- UBitCount, /// (MetaArithmetic, uint) -> uint
- UBitMSB, /// (MetaArithmetic, uint) -> uint
-
- HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
- HAbsolute, /// (f16vec2 a) -> f16vec2
- HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
- HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
- HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
- HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
- HMergeF32, /// (f16vec2 src) -> float
- HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
- HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
- HPack2, /// (float a, float b) -> f16vec2
-
- LogicalAssign, /// (bool& dst, bool src) -> void
- LogicalAnd, /// (bool a, bool b) -> bool
- LogicalOr, /// (bool a, bool b) -> bool
- LogicalXor, /// (bool a, bool b) -> bool
- LogicalNegate, /// (bool a) -> bool
- LogicalPick2, /// (bool2 pair, uint index) -> bool
- LogicalAnd2, /// (bool2 a) -> bool
-
- LogicalFOrdLessThan, /// (float a, float b) -> bool
- LogicalFOrdEqual, /// (float a, float b) -> bool
- LogicalFOrdLessEqual, /// (float a, float b) -> bool
- LogicalFOrdGreaterThan, /// (float a, float b) -> bool
- LogicalFOrdNotEqual, /// (float a, float b) -> bool
- LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
- LogicalFOrdered, /// (float a, float b) -> bool
- LogicalFUnordered, /// (float a, float b) -> bool
- LogicalFUnordLessThan, /// (float a, float b) -> bool
- LogicalFUnordEqual, /// (float a, float b) -> bool
- LogicalFUnordLessEqual, /// (float a, float b) -> bool
- LogicalFUnordGreaterThan, /// (float a, float b) -> bool
- LogicalFUnordNotEqual, /// (float a, float b) -> bool
- LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
-
- LogicalILessThan, /// (int a, int b) -> bool
- LogicalIEqual, /// (int a, int b) -> bool
- LogicalILessEqual, /// (int a, int b) -> bool
- LogicalIGreaterThan, /// (int a, int b) -> bool
- LogicalINotEqual, /// (int a, int b) -> bool
- LogicalIGreaterEqual, /// (int a, int b) -> bool
-
- LogicalULessThan, /// (uint a, uint b) -> bool
- LogicalUEqual, /// (uint a, uint b) -> bool
- LogicalULessEqual, /// (uint a, uint b) -> bool
- LogicalUGreaterThan, /// (uint a, uint b) -> bool
- LogicalUNotEqual, /// (uint a, uint b) -> bool
- LogicalUGreaterEqual, /// (uint a, uint b) -> bool
-
- LogicalAddCarry, /// (uint a, uint b) -> bool
-
- Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-
- Texture, /// (MetaTexture, float[N] coords) -> float4
- TextureLod, /// (MetaTexture, float[N] coords) -> float4
- TextureGather, /// (MetaTexture, float[N] coords) -> float4
- TextureQueryDimensions, /// (MetaTexture, float a) -> float4
- TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
- TexelFetch, /// (MetaTexture, int[N], int) -> float4
- TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
-
- ImageLoad, /// (MetaImage, int[N] coords) -> void
- ImageStore, /// (MetaImage, int[N] coords) -> void
-
- AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
- AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
- AtomicImageOr, /// (MetaImage, int[N] coords) -> void
- AtomicImageXor, /// (MetaImage, int[N] coords) -> void
- AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
-
- AtomicUExchange, /// (memory, uint) -> uint
- AtomicUAdd, /// (memory, uint) -> uint
- AtomicUMin, /// (memory, uint) -> uint
- AtomicUMax, /// (memory, uint) -> uint
- AtomicUAnd, /// (memory, uint) -> uint
- AtomicUOr, /// (memory, uint) -> uint
- AtomicUXor, /// (memory, uint) -> uint
-
- AtomicIExchange, /// (memory, int) -> int
- AtomicIAdd, /// (memory, int) -> int
- AtomicIMin, /// (memory, int) -> int
- AtomicIMax, /// (memory, int) -> int
- AtomicIAnd, /// (memory, int) -> int
- AtomicIOr, /// (memory, int) -> int
- AtomicIXor, /// (memory, int) -> int
-
- ReduceUAdd, /// (memory, uint) -> void
- ReduceUMin, /// (memory, uint) -> void
- ReduceUMax, /// (memory, uint) -> void
- ReduceUAnd, /// (memory, uint) -> void
- ReduceUOr, /// (memory, uint) -> void
- ReduceUXor, /// (memory, uint) -> void
-
- ReduceIAdd, /// (memory, int) -> void
- ReduceIMin, /// (memory, int) -> void
- ReduceIMax, /// (memory, int) -> void
- ReduceIAnd, /// (memory, int) -> void
- ReduceIOr, /// (memory, int) -> void
- ReduceIXor, /// (memory, int) -> void
-
- Branch, /// (uint branch_target) -> void
- BranchIndirect, /// (uint branch_target) -> void
- PushFlowStack, /// (uint branch_target) -> void
- PopFlowStack, /// () -> void
- Exit, /// () -> void
- Discard, /// () -> void
-
- EmitVertex, /// () -> void
- EndPrimitive, /// () -> void
-
- InvocationId, /// () -> int
- YNegate, /// () -> float
- LocalInvocationIdX, /// () -> uint
- LocalInvocationIdY, /// () -> uint
- LocalInvocationIdZ, /// () -> uint
- WorkGroupIdX, /// () -> uint
- WorkGroupIdY, /// () -> uint
- WorkGroupIdZ, /// () -> uint
-
- BallotThread, /// (bool) -> uint
- VoteAll, /// (bool) -> bool
- VoteAny, /// (bool) -> bool
- VoteEqual, /// (bool) -> bool
-
- ThreadId, /// () -> uint
- ThreadEqMask, /// () -> uint
- ThreadGeMask, /// () -> uint
- ThreadGtMask, /// () -> uint
- ThreadLeMask, /// () -> uint
- ThreadLtMask, /// () -> uint
- ShuffleIndexed, /// (uint value, uint index) -> uint
-
- Barrier, /// () -> void
- MemoryBarrierGroup, /// () -> void
- MemoryBarrierGlobal, /// () -> void
-
- Amount,
-};
-
-enum class InternalFlag {
- Zero = 0,
- Sign = 1,
- Carry = 2,
- Overflow = 3,
- Amount = 4,
-};
-
-enum class MetaStackClass {
- Ssy,
- Pbk,
-};
-
-class OperationNode;
-class ConditionalNode;
-class GprNode;
-class CustomVarNode;
-class ImmediateNode;
-class InternalFlagNode;
-class PredicateNode;
-class AbufNode;
-class CbufNode;
-class LmemNode;
-class PatchNode;
-class SmemNode;
-class GmemNode;
-class CommentNode;
-
-using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
- InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
- LmemNode, SmemNode, GmemNode, CommentNode>;
-using Node = std::shared_ptr<NodeData>;
-using Node4 = std::array<Node, 4>;
-using NodeBlock = std::vector<Node>;
-
-struct ArraySamplerNode;
-struct BindlessSamplerNode;
-struct SeparateSamplerNode;
-
-using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
-using TrackSampler = std::shared_ptr<TrackSamplerData>;
-
-struct SamplerEntry {
- /// Bound samplers constructor
- explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
- bool is_shadow_, bool is_buffer_, bool is_indexed_)
- : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
- is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
-
- /// Separate sampler constructor
- explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
- Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
- bool is_buffer_)
- : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
- buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
- is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
-
- /// Bindless samplers constructor
- explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
- bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
- : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
- is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
- }
-
- u32 index = 0; ///< Emulated index given for the this sampler.
- u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
- u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
- u32 buffer = 0; ///< Buffer where the bindless sampler is read.
- u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
- u32 size = 1; ///< Size of the sampler.
-
- Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
- bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
- bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
- bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
- bool is_separated = false; ///< Whether the image and sampler is separated or not.
-};
-
-/// Represents a tracked bindless sampler into a direct const buffer
-struct ArraySamplerNode {
- u32 index;
- u32 base_offset;
- u32 bindless_var;
-};
-
-/// Represents a tracked separate sampler image pair that was folded statically
-struct SeparateSamplerNode {
- std::pair<u32, u32> indices;
- std::pair<u32, u32> offsets;
-};
-
-/// Represents a tracked bindless sampler into a direct const buffer
-struct BindlessSamplerNode {
- u32 index;
- u32 offset;
-};
-
-struct ImageEntry {
-public:
- /// Bound images constructor
- explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
- : index{index_}, offset{offset_}, type{type_} {}
-
- /// Bindless samplers constructor
- explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
- : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
-
- void MarkWrite() {
- is_written = true;
- }
-
- void MarkRead() {
- is_read = true;
- }
-
- void MarkAtomic() {
- MarkWrite();
- MarkRead();
- is_atomic = true;
- }
-
- u32 index = 0;
- u32 offset = 0;
- u32 buffer = 0;
-
- Tegra::Shader::ImageType type{};
- bool is_bindless = false;
- bool is_written = false;
- bool is_read = false;
- bool is_atomic = false;
-};
-
-struct GlobalMemoryBase {
- u32 cbuf_index = 0;
- u32 cbuf_offset = 0;
-
- [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
- return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
- }
-};
-
-/// Parameters describing an arithmetic operation
-struct MetaArithmetic {
- bool precise{}; ///< Whether the operation can be constraint or not
-};
-
-/// Parameters describing a texture sampler
-struct MetaTexture {
- SamplerEntry sampler;
- Node array;
- Node depth_compare;
- std::vector<Node> aoffi;
- std::vector<Node> ptp;
- std::vector<Node> derivates;
- Node bias;
- Node lod;
- Node component;
- u32 element{};
- Node index;
-};
-
-struct MetaImage {
- const ImageEntry& image;
- std::vector<Node> values;
- u32 element{};
-};
-
-/// Parameters that modify an operation but are not part of any particular operand
-using Meta =
- std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
-
-class AmendNode {
-public:
- [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
- if (amend_index == amend_null_index) {
- return std::nullopt;
- }
- return {amend_index};
- }
-
- void SetAmendIndex(std::size_t index) {
- amend_index = index;
- }
-
- void ClearAmend() {
- amend_index = amend_null_index;
- }
-
-private:
- static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
- std::size_t amend_index{amend_null_index};
-};
-
-/// Holds any kind of operation that can be done in the IR
-class OperationNode final : public AmendNode {
-public:
- explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
-
- explicit OperationNode(OperationCode code_, Meta meta_)
- : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
-
- explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
- : OperationNode(code_, Meta{}, std::move(operands_)) {}
-
- explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
- : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
-
- template <typename... Args>
- explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
- : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
-
- [[nodiscard]] OperationCode GetCode() const {
- return code;
- }
-
- [[nodiscard]] const Meta& GetMeta() const {
- return meta;
- }
-
- [[nodiscard]] std::size_t GetOperandsCount() const {
- return operands.size();
- }
-
- [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
- return operands.at(operand_index);
- }
-
-private:
- OperationCode code{};
- Meta meta{};
- std::vector<Node> operands;
-};
-
-/// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final : public AmendNode {
-public:
- explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
- : condition{std::move(condition_)}, code{std::move(code_)} {}
-
- [[nodiscard]] const Node& GetCondition() const {
- return condition;
- }
-
- [[nodiscard]] const std::vector<Node>& GetCode() const {
- return code;
- }
-
-private:
- Node condition; ///< Condition to be satisfied
- std::vector<Node> code; ///< Code to execute
-};
-
-/// A general purpose register
-class GprNode final {
-public:
- explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
-
- [[nodiscard]] constexpr u32 GetIndex() const {
- return static_cast<u32>(index);
- }
-
-private:
- Tegra::Shader::Register index{};
-};
-
-/// A custom variable
-class CustomVarNode final {
-public:
- explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
-
- [[nodiscard]] constexpr u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index{};
-};
-
-/// A 32-bits value that represents an immediate value
-class ImmediateNode final {
-public:
- explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
-
- [[nodiscard]] constexpr u32 GetValue() const {
- return value;
- }
-
-private:
- u32 value{};
-};
-
-/// One of Maxwell's internal flags
-class InternalFlagNode final {
-public:
- explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
-
- [[nodiscard]] constexpr InternalFlag GetFlag() const {
- return flag;
- }
-
-private:
- InternalFlag flag{};
-};
-
-/// A predicate register, it can be negated without additional nodes
-class PredicateNode final {
-public:
- explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
- : index{index_}, negated{negated_} {}
-
- [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
- return index;
- }
-
- [[nodiscard]] constexpr bool IsNegated() const {
- return negated;
- }
-
-private:
- Tegra::Shader::Pred index{};
- bool negated{};
-};
-
-/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
-class AbufNode final {
-public:
- // Initialize for standard attributes (index is explicit).
- explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
- : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
-
- // Initialize for physical attributes (index is a variable value).
- explicit AbufNode(Node physical_address_, Node buffer_ = {})
- : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
-
- [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
- return index;
- }
-
- [[nodiscard]] u32 GetElement() const {
- return element;
- }
-
- [[nodiscard]] const Node& GetBuffer() const {
- return buffer;
- }
-
- [[nodiscard]] bool IsPhysicalBuffer() const {
- return static_cast<bool>(physical_address);
- }
-
- [[nodiscard]] const Node& GetPhysicalAddress() const {
- return physical_address;
- }
-
-private:
- Node physical_address;
- Node buffer;
- Tegra::Shader::Attribute::Index index{};
- u32 element{};
-};
-
-/// Patch memory (used to communicate tessellation stages).
-class PatchNode final {
-public:
- explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
-
- [[nodiscard]] constexpr u32 GetOffset() const {
- return offset;
- }
-
-private:
- u32 offset{};
-};
-
-/// Constant buffer node, usually mapped to uniform buffers in GLSL
-class CbufNode final {
-public:
- explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
-
- [[nodiscard]] u32 GetIndex() const {
- return index;
- }
-
- [[nodiscard]] const Node& GetOffset() const {
- return offset;
- }
-
-private:
- u32 index{};
- Node offset;
-};
-
-/// Local memory node
-class LmemNode final {
-public:
- explicit LmemNode(Node address_) : address{std::move(address_)} {}
-
- [[nodiscard]] const Node& GetAddress() const {
- return address;
- }
-
-private:
- Node address;
-};
-
-/// Shared memory node
-class SmemNode final {
-public:
- explicit SmemNode(Node address_) : address{std::move(address_)} {}
-
- [[nodiscard]] const Node& GetAddress() const {
- return address;
- }
-
-private:
- Node address;
-};
-
-/// Global memory node
-class GmemNode final {
-public:
- explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
- : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
- descriptor{descriptor_} {}
-
- [[nodiscard]] const Node& GetRealAddress() const {
- return real_address;
- }
-
- [[nodiscard]] const Node& GetBaseAddress() const {
- return base_address;
- }
-
- [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
- return descriptor;
- }
-
-private:
- Node real_address;
- Node base_address;
- GlobalMemoryBase descriptor;
-};
-
-/// Commentary, can be dropped
-class CommentNode final {
-public:
- explicit CommentNode(std::string text_) : text{std::move(text_)} {}
-
- [[nodiscard]] const std::string& GetText() const {
- return text;
- }
-
-private:
- std::string text;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
deleted file mode 100644
index 6a5b6940d..000000000
--- a/src/video_core/shader/node_helper.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-Node Conditional(Node condition, std::vector<Node> code) {
- return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
-}
-
-Node Comment(std::string text) {
- return MakeNode<CommentNode>(std::move(text));
-}
-
-Node Immediate(u32 value) {
- return MakeNode<ImmediateNode>(value);
-}
-
-Node Immediate(s32 value) {
- return Immediate(static_cast<u32>(value));
-}
-
-Node Immediate(f32 value) {
- u32 integral;
- std::memcpy(&integral, &value, sizeof(u32));
- return Immediate(integral);
-}
-
-OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
- if (is_signed) {
- return operation_code;
- }
- switch (operation_code) {
- case OperationCode::FCastInteger:
- return OperationCode::FCastUInteger;
- case OperationCode::IAdd:
- return OperationCode::UAdd;
- case OperationCode::IMul:
- return OperationCode::UMul;
- case OperationCode::IDiv:
- return OperationCode::UDiv;
- case OperationCode::IMin:
- return OperationCode::UMin;
- case OperationCode::IMax:
- return OperationCode::UMax;
- case OperationCode::ICastFloat:
- return OperationCode::UCastFloat;
- case OperationCode::ICastUnsigned:
- return OperationCode::UCastSigned;
- case OperationCode::ILogicalShiftLeft:
- return OperationCode::ULogicalShiftLeft;
- case OperationCode::ILogicalShiftRight:
- return OperationCode::ULogicalShiftRight;
- case OperationCode::IArithmeticShiftRight:
- return OperationCode::UArithmeticShiftRight;
- case OperationCode::IBitwiseAnd:
- return OperationCode::UBitwiseAnd;
- case OperationCode::IBitwiseOr:
- return OperationCode::UBitwiseOr;
- case OperationCode::IBitwiseXor:
- return OperationCode::UBitwiseXor;
- case OperationCode::IBitwiseNot:
- return OperationCode::UBitwiseNot;
- case OperationCode::IBitfieldExtract:
- return OperationCode::UBitfieldExtract;
- case OperationCode::IBitfieldInsert:
- return OperationCode::UBitfieldInsert;
- case OperationCode::IBitCount:
- return OperationCode::UBitCount;
- case OperationCode::LogicalILessThan:
- return OperationCode::LogicalULessThan;
- case OperationCode::LogicalIEqual:
- return OperationCode::LogicalUEqual;
- case OperationCode::LogicalILessEqual:
- return OperationCode::LogicalULessEqual;
- case OperationCode::LogicalIGreaterThan:
- return OperationCode::LogicalUGreaterThan;
- case OperationCode::LogicalINotEqual:
- return OperationCode::LogicalUNotEqual;
- case OperationCode::LogicalIGreaterEqual:
- return OperationCode::LogicalUGreaterEqual;
- case OperationCode::AtomicIExchange:
- return OperationCode::AtomicUExchange;
- case OperationCode::AtomicIAdd:
- return OperationCode::AtomicUAdd;
- case OperationCode::AtomicIMin:
- return OperationCode::AtomicUMin;
- case OperationCode::AtomicIMax:
- return OperationCode::AtomicUMax;
- case OperationCode::AtomicIAnd:
- return OperationCode::AtomicUAnd;
- case OperationCode::AtomicIOr:
- return OperationCode::AtomicUOr;
- case OperationCode::AtomicIXor:
- return OperationCode::AtomicUXor;
- case OperationCode::INegate:
- UNREACHABLE_MSG("Can't negate an unsigned integer");
- return {};
- case OperationCode::IAbsolute:
- UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
- return {};
- default:
- UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
- return {};
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
deleted file mode 100644
index 1e0886185..000000000
--- a/src/video_core/shader/node_helper.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/shader/node.h"
-
-namespace VideoCommon::Shader {
-
-/// This arithmetic operation cannot be constraint
-inline constexpr MetaArithmetic PRECISE = {true};
-/// This arithmetic operation can be optimized away
-inline constexpr MetaArithmetic NO_PRECISE = {false};
-
-/// Creates a conditional node
-Node Conditional(Node condition, std::vector<Node> code);
-
-/// Creates a commentary node
-Node Comment(std::string text);
-
-/// Creates an u32 immediate
-Node Immediate(u32 value);
-
-/// Creates a s32 immediate
-Node Immediate(s32 value);
-
-/// Creates a f32 immediate
-Node Immediate(f32 value);
-
-/// Converts an signed operation code to an unsigned operation code
-OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
-
-template <typename T, typename... Args>
-Node MakeNode(Args&&... args) {
- static_assert(std::is_convertible_v<T, NodeData>);
- return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
-}
-
-template <typename T, typename... Args>
-TrackSampler MakeTrackSampler(Args&&... args) {
- static_assert(std::is_convertible_v<T, TrackSamplerData>);
- return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
-}
-
-template <typename... Args>
-Node Operation(OperationCode code, Args&&... args) {
- if constexpr (sizeof...(args) == 0) {
- return MakeNode<OperationNode>(code);
- } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
- Meta>) {
- return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
- } else {
- return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
- }
-}
-
-template <typename... Args>
-Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
- return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
deleted file mode 100644
index 148d91fcb..000000000
--- a/src/video_core/shader/registry.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <tuple>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/kepler_compute.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Engines::ConstBufferEngineInterface;
-using Tegra::Engines::SamplerDescriptor;
-using Tegra::Engines::ShaderType;
-
-namespace {
-
-GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
- if (shader_stage == ShaderType::Compute) {
- return {};
- }
-
- auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
-
- return {
- .tfb_layouts = graphics.regs.tfb_layouts,
- .tfb_varying_locs = graphics.regs.tfb_varying_locs,
- .primitive_topology = graphics.regs.draw.topology,
- .tessellation_primitive = graphics.regs.tess_mode.prim,
- .tessellation_spacing = graphics.regs.tess_mode.spacing,
- .tfb_enabled = graphics.regs.tfb_enabled != 0,
- .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
- };
-}
-
-ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
- if (shader_stage != ShaderType::Compute) {
- return {};
- }
-
- auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
- const auto& launch = compute.launch_description;
-
- return {
- .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
- .shared_memory_size_in_words = launch.shared_alloc,
- .local_memory_size_in_words = launch.local_pos_alloc,
- };
-}
-
-} // Anonymous namespace
-
-Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
- : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
- bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
-
-Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
- : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
- graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
- shader_stage, engine_)} {}
-
-Registry::~Registry() = default;
-
-std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
- const std::pair<u32, u32> key = {buffer, offset};
- const auto iter = keys.find(key);
- if (iter != keys.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
- keys.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
- const u32 key = offset;
- const auto iter = bound_samplers.find(key);
- if (iter != bound_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
- bound_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
- std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
- SeparateSamplerKey key;
- key.buffers = buffers;
- key.offsets = offsets;
- const auto iter = separate_samplers.find(key);
- if (iter != separate_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
-
- const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
- const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
- const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
- separate_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
- const std::pair key = {buffer, offset};
- const auto iter = bindless_samplers.find(key);
- if (iter != bindless_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
- bindless_samplers.emplace(key, value);
- return value;
-}
-
-void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
- keys.insert_or_assign({buffer, offset}, value);
-}
-
-void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
- bound_samplers.insert_or_assign(offset, sampler);
-}
-
-void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
- bindless_samplers.insert_or_assign({buffer, offset}, sampler);
-}
-
-bool Registry::IsConsistent() const {
- if (!engine) {
- return true;
- }
- return std::all_of(keys.begin(), keys.end(),
- [this](const auto& pair) {
- const auto [cbuf, offset] = pair.first;
- const auto value = pair.second;
- return value == engine->AccessConstBuffer32(stage, cbuf, offset);
- }) &&
- std::all_of(bound_samplers.begin(), bound_samplers.end(),
- [this](const auto& sampler) {
- const auto [key, value] = sampler;
- return value == engine->AccessBoundSampler(stage, key);
- }) &&
- std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
- [this](const auto& sampler) {
- const auto [cbuf, offset] = sampler.first;
- const auto value = sampler.second;
- return value == engine->AccessBindlessSampler(stage, cbuf, offset);
- });
-}
-
-bool Registry::HasEqualKeys(const Registry& rhs) const {
- return std::tie(keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
-}
-
-const GraphicsInfo& Registry::GetGraphicsInfo() const {
- ASSERT(stage != Tegra::Engines::ShaderType::Compute);
- return graphics_info;
-}
-
-const ComputeInfo& Registry::GetComputeInfo() const {
- ASSERT(stage == Tegra::Engines::ShaderType::Compute);
- return compute_info;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
deleted file mode 100644
index 4bebefdde..000000000
--- a/src/video_core/shader/registry.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include "common/common_types.h"
-#include "common/hash.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCommon::Shader {
-
-struct SeparateSamplerKey {
- std::pair<u32, u32> buffers;
- std::pair<u32, u32> offsets;
-};
-
-} // namespace VideoCommon::Shader
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::Shader::SeparateSamplerKey> {
- std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
- return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
- key.offsets.second);
- }
-};
-
-template <>
-struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
- bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
- const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
- return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
- }
-};
-
-} // namespace std
-
-namespace VideoCommon::Shader {
-
-using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
-using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
-using SeparateSamplerMap =
- std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
-using BindlessSamplerMap =
- std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
-
-struct GraphicsInfo {
- using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
- std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
- tfb_layouts{};
- std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
- Maxwell::PrimitiveTopology primitive_topology{};
- Maxwell::TessellationPrimitive tessellation_primitive{};
- Maxwell::TessellationSpacing tessellation_spacing{};
- bool tfb_enabled = false;
- bool tessellation_clockwise = false;
-};
-static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
- std::is_standard_layout_v<GraphicsInfo>);
-
-struct ComputeInfo {
- std::array<u32, 3> workgroup_size{};
- u32 shared_memory_size_in_words = 0;
- u32 local_memory_size_in_words = 0;
-};
-static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
-
-struct SerializedRegistryInfo {
- VideoCore::GuestDriverProfile guest_driver_profile;
- u32 bound_buffer = 0;
- GraphicsInfo graphics;
- ComputeInfo compute;
-};
-
-/**
- * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
- * With it, the shader can obtain required data from GPU state and store it for disk shader
- * compilation.
- */
-class Registry {
-public:
- explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
-
- explicit Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine_);
-
- ~Registry();
-
- /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
- /// not it will obtain it from maxwell3d and register it.
- std::optional<u32> ObtainKey(u32 buffer, u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
- std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
-
- /// Inserts a key.
- void InsertKey(u32 buffer, u32 offset, u32 value);
-
- /// Inserts a bound sampler key.
- void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Inserts a bindless sampler key.
- void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Checks keys and samplers against engine's current const buffers.
- /// Returns true if they are the same value, false otherwise.
- bool IsConsistent() const;
-
- /// Returns true if the keys are equal to the other ones in the registry.
- bool HasEqualKeys(const Registry& rhs) const;
-
- /// Returns graphics information from this shader
- const GraphicsInfo& GetGraphicsInfo() const;
-
- /// Returns compute information from this shader
- const ComputeInfo& GetComputeInfo() const;
-
- /// Gives an getter to the const buffer keys in the database.
- const KeyMap& GetKeys() const {
- return keys;
- }
-
- /// Gets samplers database.
- const BoundSamplerMap& GetBoundSamplers() const {
- return bound_samplers;
- }
-
- /// Gets bindless samplers database.
- const BindlessSamplerMap& GetBindlessSamplers() const {
- return bindless_samplers;
- }
-
- /// Gets bound buffer used on this shader
- u32 GetBoundBuffer() const {
- return bound_buffer;
- }
-
- /// Obtains access to the guest driver's profile.
- VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
- return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
- }
-
-private:
- const Tegra::Engines::ShaderType stage;
- VideoCore::GuestDriverProfile stored_guest_driver_profile;
- Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
- KeyMap keys;
- BoundSamplerMap bound_samplers;
- SeparateSamplerMap separate_samplers;
- BindlessSamplerMap bindless_samplers;
- u32 bound_buffer;
- GraphicsInfo graphics_info;
- ComputeInfo compute_info;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
deleted file mode 100644
index a4987ffc6..000000000
--- a/src/video_core/shader/shader_ir.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cmath>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::PredCondition;
-using Tegra::Shader::PredOperation;
-using Tegra::Shader::Register;
-
-ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
- Registry& registry_)
- : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
- registry_} {
- Decode();
- PostDecode();
-}
-
-ShaderIR::~ShaderIR() = default;
-
-Node ShaderIR::GetRegister(Register reg) {
- if (reg != Register::ZeroIndex) {
- used_registers.insert(static_cast<u32>(reg));
- }
- return MakeNode<GprNode>(reg);
-}
-
-Node ShaderIR::GetCustomVariable(u32 id) {
- return MakeNode<CustomVarNode>(id);
-}
-
-Node ShaderIR::GetImmediate19(Instruction instr) {
- return Immediate(instr.alu.GetImm20_19());
-}
-
-Node ShaderIR::GetImmediate32(Instruction instr) {
- return Immediate(instr.alu.GetImm20_32());
-}
-
-Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
- const auto index = static_cast<u32>(index_);
- const auto offset = static_cast<u32>(offset_);
-
- used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
-
- return MakeNode<CbufNode>(index, Immediate(offset));
-}
-
-Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
- const auto index = static_cast<u32>(index_);
- const auto offset = static_cast<u32>(offset_);
-
- used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
-
- Node final_offset = [&] {
- // Attempt to inline constant buffer without a variable offset. This is done to allow
- // tracking LDC calls.
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- return Immediate(offset);
- }
- }
- return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
- }();
- return MakeNode<CbufNode>(index, std::move(final_offset));
-}
-
-Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
- const auto pred = static_cast<Pred>(pred_);
- if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
- used_predicates.insert(pred);
- }
-
- return MakeNode<PredicateNode>(pred, negated);
-}
-
-Node ShaderIR::GetPredicate(bool immediate) {
- return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
-}
-
-Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
- MarkAttributeUsage(index, element);
- used_input_attributes.emplace(index);
- return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
-}
-
-Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
- uses_physical_attributes = true;
- return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
-}
-
-Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
- MarkAttributeUsage(index, element);
- used_output_attributes.insert(index);
- return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
-}
-
-Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
- Node node = MakeNode<InternalFlagNode>(flag);
- if (negated) {
- return Operation(OperationCode::LogicalNegate, std::move(node));
- }
- return node;
-}
-
-Node ShaderIR::GetLocalMemory(Node address) {
- return MakeNode<LmemNode>(std::move(address));
-}
-
-Node ShaderIR::GetSharedMemory(Node address) {
- return MakeNode<SmemNode>(std::move(address));
-}
-
-Node ShaderIR::GetTemporary(u32 id) {
- return GetRegister(Register::ZeroIndex + 1 + id);
-}
-
-Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
- if (absolute) {
- value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
- }
- if (negate) {
- value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
- }
- return value;
-}
-
-Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
- if (!saturate) {
- return value;
- }
-
- Node positive_zero = Immediate(std::copysignf(0, 1));
- Node positive_one = Immediate(1.0f);
- return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
- std::move(positive_one));
-}
-
-Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
- switch (size) {
- case Register::Size::Byte:
- value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
- std::move(value), Immediate(24));
- value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
- std::move(value), Immediate(24));
- return value;
- case Register::Size::Short:
- value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
- std::move(value), Immediate(16));
- value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
- std::move(value), Immediate(16));
- return value;
- case Register::Size::Word:
- // Default - do nothing
- return value;
- default:
- UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
- return value;
- }
-}
-
-Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
- if (!is_signed) {
- // Absolute or negate on an unsigned is pointless
- return value;
- }
- if (absolute) {
- value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
- }
- if (negate) {
- value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
- }
- return value;
-}
-
-Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
- Node value = Immediate(instr.half_imm.PackImmediates());
- if (!has_negation) {
- return value;
- }
-
- Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
- Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
-
- return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
- std::move(second_negate));
-}
-
-Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
- return Operation(OperationCode::HUnpack, type, std::move(value));
-}
-
-Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
- switch (merge) {
- case Tegra::Shader::HalfMerge::H0_H1:
- return src;
- case Tegra::Shader::HalfMerge::F32:
- return Operation(OperationCode::HMergeF32, std::move(src));
- case Tegra::Shader::HalfMerge::Mrg_H0:
- return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
- case Tegra::Shader::HalfMerge::Mrg_H1:
- return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
- }
- UNREACHABLE();
- return src;
-}
-
-Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
- if (absolute) {
- value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
- }
- if (negate) {
- value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
- GetPredicate(true));
- }
- return value;
-}
-
-Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
- if (!saturate) {
- return value;
- }
-
- Node positive_zero = Immediate(std::copysignf(0, 1));
- Node positive_one = Immediate(1.0f);
- return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
- std::move(positive_one));
-}
-
-Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
- if (condition == PredCondition::T) {
- return GetPredicate(true);
- } else if (condition == PredCondition::F) {
- return GetPredicate(false);
- }
-
- static constexpr std::array comparison_table{
- OperationCode(0),
- OperationCode::LogicalFOrdLessThan, // LT
- OperationCode::LogicalFOrdEqual, // EQ
- OperationCode::LogicalFOrdLessEqual, // LE
- OperationCode::LogicalFOrdGreaterThan, // GT
- OperationCode::LogicalFOrdNotEqual, // NE
- OperationCode::LogicalFOrdGreaterEqual, // GE
- OperationCode::LogicalFOrdered, // NUM
- OperationCode::LogicalFUnordered, // NAN
- OperationCode::LogicalFUnordLessThan, // LTU
- OperationCode::LogicalFUnordEqual, // EQU
- OperationCode::LogicalFUnordLessEqual, // LEU
- OperationCode::LogicalFUnordGreaterThan, // GTU
- OperationCode::LogicalFUnordNotEqual, // NEU
- OperationCode::LogicalFUnordGreaterEqual, // GEU
- };
- const std::size_t index = static_cast<std::size_t>(condition);
- ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
-
- return Operation(comparison_table[index], op_a, op_b);
-}
-
-Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
- Node op_b) {
- static constexpr std::array comparison_table{
- std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
- std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
- std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
- std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
- std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
- std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
- };
-
- const auto comparison =
- std::find_if(comparison_table.cbegin(), comparison_table.cend(),
- [condition](const auto entry) { return condition == entry.first; });
- UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
- "Unknown predicate comparison operation");
-
- return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
- std::move(op_b));
-}
-
-Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
- Node op_b) {
- static constexpr std::array comparison_table{
- std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
- std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
- std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
- std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
- std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
- std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
- std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
- std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
- std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
- std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
- std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
- };
-
- const auto comparison =
- std::find_if(comparison_table.cbegin(), comparison_table.cend(),
- [condition](const auto entry) { return condition == entry.first; });
- UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
- "Unknown predicate comparison operation");
-
- return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
-}
-
-OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
- static constexpr std::array operation_table{
- OperationCode::LogicalAnd,
- OperationCode::LogicalOr,
- OperationCode::LogicalXor,
- };
-
- const auto index = static_cast<std::size_t>(operation);
- if (index >= operation_table.size()) {
- UNIMPLEMENTED_MSG("Unknown predicate operation.");
- return {};
- }
-
- return operation_table[index];
-}
-
-Node ShaderIR::GetConditionCode(ConditionCode cc) const {
- switch (cc) {
- case ConditionCode::NEU:
- return GetInternalFlag(InternalFlag::Zero, true);
- case ConditionCode::FCSM_TR:
- UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
- return MakeNode<PredicateNode>(Pred::NeverExecute, false);
- default:
- UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
- return MakeNode<PredicateNode>(Pred::NeverExecute, false);
- }
-}
-
-void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
- bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
-}
-
-void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
- bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
-}
-
-void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
- bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
-}
-
-void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
- bb.push_back(
- Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
-}
-
-void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
- bb.push_back(
- Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
-}
-
-void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
- SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
-}
-
-void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
- if (!sets_cc) {
- return;
- }
- Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
- SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
- LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
-}
-
-void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
- if (!sets_cc) {
- return;
- }
- Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
- SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
- LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
-}
-
-Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
- return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
- Immediate(offset), Immediate(bits));
-}
-
-Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
- return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
- Immediate(bits));
-}
-
-void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
- switch (index) {
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- break;
- case 1:
- uses_layer = true;
- break;
- case 2:
- uses_viewport_index = true;
- break;
- case 3:
- uses_point_size = true;
- break;
- }
- break;
- case Attribute::Index::TessCoordInstanceIDVertexID:
- switch (element) {
- case 2:
- uses_instance_id = true;
- break;
- case 3:
- uses_vertex_id = true;
- break;
- }
- break;
- case Attribute::Index::ClipDistances0123:
- case Attribute::Index::ClipDistances4567: {
- const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
- used_clip_distances.at(clip_index) = true;
- break;
- }
- case Attribute::Index::FrontColor:
- case Attribute::Index::FrontSecondaryColor:
- case Attribute::Index::BackColor:
- case Attribute::Index::BackSecondaryColor:
- uses_legacy_varyings = true;
- break;
- default:
- if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
- uses_legacy_varyings = true;
- }
- break;
- }
-}
-
-std::size_t ShaderIR::DeclareAmend(Node new_amend) {
- const auto id = amend_code.size();
- amend_code.push_back(std::move(new_amend));
- return id;
-}
-
-u32 ShaderIR::NewCustomVariable() {
- return num_custom_variables++;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
deleted file mode 100644
index 1cd7c14d7..000000000
--- a/src/video_core/shader/shader_ir.h
+++ /dev/null
@@ -1,479 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <list>
-#include <map>
-#include <optional>
-#include <set>
-#include <tuple>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-struct ShaderBlock;
-
-constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
-
-struct ConstBuffer {
- constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
- : max_offset{max_offset_}, is_indirect{is_indirect_} {}
-
- constexpr ConstBuffer() = default;
-
- void MarkAsUsed(u64 offset) {
- max_offset = std::max(max_offset, static_cast<u32>(offset));
- }
-
- void MarkAsUsedIndirect() {
- is_indirect = true;
- }
-
- bool IsIndirect() const {
- return is_indirect;
- }
-
- u32 GetSize() const {
- return max_offset + static_cast<u32>(sizeof(float));
- }
-
- u32 GetMaxOffset() const {
- return max_offset;
- }
-
-private:
- u32 max_offset = 0;
- bool is_indirect = false;
-};
-
-struct GlobalMemoryUsage {
- bool is_read{};
- bool is_written{};
-};
-
-class ShaderIR final {
-public:
- explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
- CompilerSettings settings_, Registry& registry_);
- ~ShaderIR();
-
- const std::map<u32, NodeBlock>& GetBasicBlocks() const {
- return basic_blocks;
- }
-
- const std::set<u32>& GetRegisters() const {
- return used_registers;
- }
-
- const std::set<Tegra::Shader::Pred>& GetPredicates() const {
- return used_predicates;
- }
-
- const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
- return used_input_attributes;
- }
-
- const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
- return used_output_attributes;
- }
-
- const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
- return used_cbufs;
- }
-
- const std::list<SamplerEntry>& GetSamplers() const {
- return used_samplers;
- }
-
- const std::list<ImageEntry>& GetImages() const {
- return used_images;
- }
-
- const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
- const {
- return used_clip_distances;
- }
-
- const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
- return used_global_memory;
- }
-
- std::size_t GetLength() const {
- return static_cast<std::size_t>(coverage_end * sizeof(u64));
- }
-
- bool UsesLayer() const {
- return uses_layer;
- }
-
- bool UsesViewportIndex() const {
- return uses_viewport_index;
- }
-
- bool UsesPointSize() const {
- return uses_point_size;
- }
-
- bool UsesInstanceId() const {
- return uses_instance_id;
- }
-
- bool UsesVertexId() const {
- return uses_vertex_id;
- }
-
- bool UsesLegacyVaryings() const {
- return uses_legacy_varyings;
- }
-
- bool UsesYNegate() const {
- return uses_y_negate;
- }
-
- bool UsesWarps() const {
- return uses_warps;
- }
-
- bool HasPhysicalAttributes() const {
- return uses_physical_attributes;
- }
-
- const Tegra::Shader::Header& GetHeader() const {
- return header;
- }
-
- bool IsFlowStackDisabled() const {
- return disable_flow_stack;
- }
-
- bool IsDecompiled() const {
- return decompiled;
- }
-
- const ASTManager& GetASTManager() const {
- return program_manager;
- }
-
- ASTNode GetASTProgram() const {
- return program_manager.GetProgram();
- }
-
- u32 GetASTNumVariables() const {
- return program_manager.GetVariables();
- }
-
- u32 ConvertAddressToNvidiaSpace(u32 address) const {
- return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
- }
-
- /// Returns a condition code evaluated from internal flags
- Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
-
- const Node& GetAmendNode(std::size_t index) const {
- return amend_code[index];
- }
-
- u32 GetNumCustomVariables() const {
- return num_custom_variables;
- }
-
-private:
- friend class ASTDecoder;
-
- struct SamplerInfo {
- std::optional<Tegra::Shader::TextureType> type;
- std::optional<bool> is_array;
- std::optional<bool> is_shadow;
- std::optional<bool> is_buffer;
-
- constexpr bool IsComplete() const noexcept {
- return type && is_array && is_shadow && is_buffer;
- }
- };
-
- void Decode();
- void PostDecode();
-
- NodeBlock DecodeRange(u32 begin, u32 end);
- void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
- void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
-
- /**
- * Decodes a single instruction from Tegra to IR.
- * @param bb Basic block where the nodes will be written to.
- * @param pc Program counter. Offset to decode.
- * @return Next address to decode.
- */
- u32 DecodeInstr(NodeBlock& bb, u32 pc);
-
- u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
- u32 DecodeBfe(NodeBlock& bb, u32 pc);
- u32 DecodeBfi(NodeBlock& bb, u32 pc);
- u32 DecodeShift(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
- u32 DecodeFfma(NodeBlock& bb, u32 pc);
- u32 DecodeHfma2(NodeBlock& bb, u32 pc);
- u32 DecodeConversion(NodeBlock& bb, u32 pc);
- u32 DecodeWarp(NodeBlock& bb, u32 pc);
- u32 DecodeMemory(NodeBlock& bb, u32 pc);
- u32 DecodeTexture(NodeBlock& bb, u32 pc);
- u32 DecodeImage(NodeBlock& bb, u32 pc);
- u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
- u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
- u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
- u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
- u32 DecodeVideo(NodeBlock& bb, u32 pc);
- u32 DecodeXmad(NodeBlock& bb, u32 pc);
- u32 DecodeOther(NodeBlock& bb, u32 pc);
-
- /// Generates a node for a passed register.
- Node GetRegister(Tegra::Shader::Register reg);
- /// Generates a node for a custom variable
- Node GetCustomVariable(u32 id);
- /// Generates a node representing a 19-bit immediate value
- Node GetImmediate19(Tegra::Shader::Instruction instr);
- /// Generates a node representing a 32-bit immediate value
- Node GetImmediate32(Tegra::Shader::Instruction instr);
- /// Generates a node representing a constant buffer
- Node GetConstBuffer(u64 index, u64 offset);
- /// Generates a node representing a constant buffer with a variadic offset
- Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
- /// Generates a node for a passed predicate. It can be optionally negated
- Node GetPredicate(u64 pred, bool negated = false);
- /// Generates a predicate node for an immediate true or false value
- Node GetPredicate(bool immediate);
- /// Generates a node representing an input attribute. Keeps track of used attributes.
- Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
- /// Generates a node representing a physical input attribute.
- Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
- /// Generates a node representing an output attribute. Keeps track of used attributes.
- Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
- /// Generates a node representing an internal flag
- Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
- /// Generates a node representing a local memory address
- Node GetLocalMemory(Node address);
- /// Generates a node representing a shared memory address
- Node GetSharedMemory(Node address);
- /// Generates a temporary, internally it uses a post-RZ register
- Node GetTemporary(u32 id);
-
- /// Sets a register. src value must be a number-evaluated node.
- void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
- /// Sets a predicate. src value must be a bool-evaluated node
- void SetPredicate(NodeBlock& bb, u64 dest, Node src);
- /// Sets an internal flag. src value must be a bool-evaluated node
- void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
- /// Sets a local memory address with a value.
- void SetLocalMemory(NodeBlock& bb, Node address, Node value);
- /// Sets a shared memory address with a value.
- void SetSharedMemory(NodeBlock& bb, Node address, Node value);
- /// Sets a temporary. Internally it uses a post-RZ register
- void SetTemporary(NodeBlock& bb, u32 id, Node value);
-
- /// Sets internal flags from a float
- void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
- /// Sets internal flags from an integer
- void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
-
- /// Conditionally absolute/negated float. Absolute is applied first
- Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
- /// Conditionally saturates a float
- Node GetSaturatedFloat(Node value, bool saturate = true);
-
- /// Converts an integer to different sizes.
- Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
- /// Conditionally absolute/negated integer. Absolute is applied first
- Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
-
- /// Unpacks a half immediate from an instruction
- Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
- /// Unpacks a binary value into a half float pair with a type format
- Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
- /// Merges a half pair into another value
- Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
- /// Conditionally absolute/negated half float pair. Absolute is applied first
- Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
- /// Conditionally saturates a half float pair
- Node GetSaturatedHalfFloat(Node value, bool saturate = true);
-
- /// Get image component value by type and size
- std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
- u32 component_size, Node original_value);
-
- /// Returns a predicate comparing two floats
- Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
- /// Returns a predicate comparing two integers
- Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
- Node op_a, Node op_b);
- /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
- Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
-
- /// Returns a predicate combiner operation
- OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
-
- /// Queries the missing sampler info from the execution context.
- SamplerInfo GetSamplerInfo(SamplerInfo info,
- std::optional<Tegra::Engines::SamplerDescriptor> sampler);
-
- /// Accesses a texture sampler.
- std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
-
- /// Accesses a texture sampler for a bindless texture.
- std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
- Node& index_var);
-
- /// Accesses an image.
- ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
-
- /// Access a bindless image sampler.
- ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
-
- /// Extracts a sequence of bits from a node
- Node BitfieldExtract(Node value, u32 offset, u32 bits);
-
- /// Inserts a sequence of bits from a node
- Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
-
- /// Marks the usage of a input or output attribute.
- void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
-
- /// Decodes VMNMX instruction and inserts its code into the passed basic block.
- void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
-
- void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
-
- void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components, bool ignore_mask = false);
- void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components, bool ignore_mask = false);
-
- Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array, bool is_aoffi,
- std::optional<Tegra::Shader::Register> bindless_reg);
-
- Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array);
-
- Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
- bool is_bindless);
-
- Node4 GetTldCode(Tegra::Shader::Instruction instr);
-
- Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool is_array);
-
- std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
- Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
- bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
-
- std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
-
- std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
-
- Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
- std::optional<Tegra::Shader::Register> bindless_reg);
-
- Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
- u64 byte_height);
-
- void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
- Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
- Tegra::Shader::PredicateResultMode predicate_mode,
- Tegra::Shader::Pred predicate, bool sets_cc);
- void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
- Node op_c, Node imm_lut, bool sets_cc);
-
- std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
-
- std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor);
-
- std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
- const OperationNode& operation,
- Node gpr, Node base_offset,
- Node tracked, const NodeBlock& code,
- s64 cursor);
-
- std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
-
- std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
- s64 cursor) const;
-
- std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
- Tegra::Shader::Instruction instr,
- bool is_read, bool is_write);
-
- /// Register new amending code and obtain the reference id.
- std::size_t DeclareAmend(Node new_amend);
-
- u32 NewCustomVariable();
-
- const ProgramCode& program_code;
- const u32 main_offset;
- const CompilerSettings settings;
- Registry& registry;
-
- bool decompiled{};
- bool disable_flow_stack{};
-
- u32 coverage_begin{};
- u32 coverage_end{};
-
- std::map<u32, NodeBlock> basic_blocks;
- NodeBlock global_code;
- ASTManager program_manager{true, true};
- std::vector<Node> amend_code;
- u32 num_custom_variables{};
-
- std::set<u32> used_registers;
- std::set<Tegra::Shader::Pred> used_predicates;
- std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
- std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
- std::map<u32, ConstBuffer> used_cbufs;
- std::list<SamplerEntry> used_samplers;
- std::list<ImageEntry> used_images;
- std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
- std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
- bool uses_layer{};
- bool uses_viewport_index{};
- bool uses_point_size{};
- bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
- bool uses_instance_id{};
- bool uses_vertex_id{};
- bool uses_legacy_varyings{};
- bool uses_y_negate{};
- bool uses_warps{};
- bool uses_indexed_samplers{};
-
- Tegra::Shader::Header header;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
deleted file mode 100644
index 6be3ea92b..000000000
--- a/src/video_core/shader/track.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <utility>
-#include <variant>
-
-#include "common/common_types.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
- OperationCode operation_code) {
- for (; cursor >= 0; --cursor) {
- Node node = code.at(cursor);
-
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (operation->GetCode() == operation_code) {
- return {std::move(node), cursor};
- }
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- const auto& conditional_code = conditional->GetCode();
- auto result = FindOperation(
- conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
- auto& found = result.first;
- if (found) {
- return {std::move(found), cursor};
- }
- }
- }
- return {};
-}
-
-std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
- if (operation.GetCode() != OperationCode::UAdd) {
- return std::nullopt;
- }
- Node gpr;
- Node offset;
- ASSERT(operation.GetOperandsCount() == 2);
- for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
- Node operand = operation[i];
- if (std::holds_alternative<ImmediateNode>(*operand)) {
- offset = operation[i];
- } else if (std::holds_alternative<GprNode>(*operand)) {
- gpr = operation[i];
- }
- }
- if (offset && gpr) {
- return std::make_pair(gpr, offset);
- }
- return std::nullopt;
-}
-
-bool AmendNodeCv(std::size_t amend_index, Node node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- operation->SetAmendIndex(amend_index);
- return true;
- }
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- conditional->SetAmendIndex(amend_index);
- return true;
- }
- return false;
-}
-
-} // Anonymous namespace
-
-std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor) {
- if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
- const u32 cbuf_index = cbuf->GetIndex();
-
- // Constant buffer found, test if it's an immediate
- const auto& offset = cbuf->GetOffset();
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
- return {tracked, track};
- }
- if (const auto operation = std::get_if<OperationNode>(&*offset)) {
- const u32 bound_buffer = registry.GetBoundBuffer();
- if (bound_buffer != cbuf_index) {
- return {};
- }
- if (const std::optional pair = DecoupleIndirectRead(*operation)) {
- auto [gpr, base_offset] = *pair;
- return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
- code, cursor);
- }
- }
- return {};
- }
- if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
- if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
- return {};
- }
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
- }
- return TrackBindlessSampler(source, code, new_cursor);
- }
- if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- const OperationNode& op = *operation;
-
- const OperationCode opcode = operation->GetCode();
- if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
- ASSERT(op.GetOperandsCount() == 2);
- auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
- auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
- if (node_a && node_b) {
- auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
- std::pair{offset_a, offset_b});
- return {tracked, std::move(track)};
- }
- }
- std::size_t i = op.GetOperandsCount();
- while (i--) {
- if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
- // Constant buffer found in operand.
- return found;
- }
- }
- return {};
- }
- if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
- const auto& conditional_code = conditional->GetCode();
- return TrackBindlessSampler(tracked, conditional_code,
- static_cast<s64>(conditional_code.size()));
- }
- return {};
-}
-
-std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
- const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
- const NodeBlock& code, s64 cursor) {
- const auto offset_imm = std::get<ImmediateNode>(*base_offset);
- const auto& gpu_driver = registry.AccessGuestDriverProfile();
- const u32 bindless_cv = NewCustomVariable();
- const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
- Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
-
- Node cv_node = GetCustomVariable(bindless_cv);
- Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
- const std::size_t amend_index = DeclareAmend(std::move(amend_op));
- AmendNodeCv(amend_index, code[cursor]);
-
- // TODO: Implement bindless index custom variable
- auto track =
- MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
- return {tracked, track};
-}
-
-std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
- s64 cursor) const {
- if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
- // Constant buffer found, test if it's an immediate
- const auto& offset = cbuf->GetOffset();
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- return {tracked, cbuf->GetIndex(), immediate->GetValue()};
- }
- return {};
- }
- if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
- if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
- return {};
- }
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
- }
- return TrackCbuf(source, code, new_cursor);
- }
- if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
- if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
- // Cbuf found in operand.
- return found;
- }
- }
- return {};
- }
- if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
- const auto& conditional_code = conditional->GetCode();
- return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
- }
- return {};
-}
-
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
- // that it uses as operand
- const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
- const auto& found = result.first;
- if (!found) {
- return std::nullopt;
- }
- if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
- return immediate->GetValue();
- }
- return std::nullopt;
-}
-
-std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
- s64 cursor) const {
- for (; cursor >= 0; --cursor) {
- const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
- if (!found_node) {
- return {};
- }
- const auto operation = std::get_if<OperationNode>(&*found_node);
- ASSERT(operation);
-
- const auto& target = (*operation)[0];
- if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
- if (gpr_target->GetIndex() == tracked->GetIndex()) {
- return {(*operation)[1], new_cursor};
- }
- }
- }
- return {};
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
deleted file mode 100644
index 22a933761..000000000
--- a/src/video_core/shader/transform_feedback.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <unordered_map>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
-
-/// Attribute offsets that describe a vector
-constexpr std::array VECTORS = {
- 28, // gl_Position
- 32, // Generic 0
- 36, // Generic 1
- 40, // Generic 2
- 44, // Generic 3
- 48, // Generic 4
- 52, // Generic 5
- 56, // Generic 6
- 60, // Generic 7
- 64, // Generic 8
- 68, // Generic 9
- 72, // Generic 10
- 76, // Generic 11
- 80, // Generic 12
- 84, // Generic 13
- 88, // Generic 14
- 92, // Generic 15
- 96, // Generic 16
- 100, // Generic 17
- 104, // Generic 18
- 108, // Generic 19
- 112, // Generic 20
- 116, // Generic 21
- 120, // Generic 22
- 124, // Generic 23
- 128, // Generic 24
- 132, // Generic 25
- 136, // Generic 26
- 140, // Generic 27
- 144, // Generic 28
- 148, // Generic 29
- 152, // Generic 30
- 156, // Generic 31
- 160, // gl_FrontColor
- 164, // gl_FrontSecondaryColor
- 160, // gl_BackColor
- 164, // gl_BackSecondaryColor
- 192, // gl_TexCoord[0]
- 196, // gl_TexCoord[1]
- 200, // gl_TexCoord[2]
- 204, // gl_TexCoord[3]
- 208, // gl_TexCoord[4]
- 212, // gl_TexCoord[5]
- 216, // gl_TexCoord[6]
- 220, // gl_TexCoord[7]
-};
-} // namespace
-
-std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
-
- std::unordered_map<u8, VaryingTFB> tfb;
-
- for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
- const auto& locations = info.tfb_varying_locs[buffer];
- const auto& layout = info.tfb_layouts[buffer];
- const std::size_t varying_count = layout.varying_count;
-
- std::size_t highest = 0;
-
- for (std::size_t offset = 0; offset < varying_count; ++offset) {
- const std::size_t base_offset = offset;
- const u8 location = locations[offset];
-
- VaryingTFB varying;
- varying.buffer = layout.stream;
- varying.stride = layout.stride;
- varying.offset = offset * sizeof(u32);
- varying.components = 1;
-
- if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
- UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
-
- const u8 base_index = location / 4;
- while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
- ++offset;
- ++varying.components;
- }
- }
-
- [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
- UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
-
- highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
- }
-
- UNIMPLEMENTED_IF(highest != layout.stride);
- }
- return tfb;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
deleted file mode 100644
index 77d05f64c..000000000
--- a/src/video_core/shader/transform_feedback.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <unordered_map>
-
-#include "common/common_types.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-struct VaryingTFB {
- std::size_t buffer;
- std::size_t stride;
- std::size_t offset;
- std::size_t components;
-};
-
-std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..78bf90c48
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "common/assert.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
+
+namespace VideoCommon {
+
+void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
+ std::scoped_lock lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ RemovePendingShaders();
+}
+
+void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
+ std::lock_guard lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+}
+
+void ShaderCache::SyncGuestHost() {
+ std::scoped_lock lock{invalidation_mutex};
+ RemovePendingShaders();
+}
+
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_)
+ : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+ rasterizer{rasterizer_} {}
+
+bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
+ auto& dirty{maxwell3d.dirty.flags};
+ if (!dirty[VideoCommon::Dirty::Shaders]) {
+ return last_shaders_valid;
+ }
+ dirty[VideoCommon::Dirty::Shaders] = false;
+
+ const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+ unique_hashes[index] = 0;
+ continue;
+ }
+ const auto& shader_config{maxwell3d.regs.shader_config[index]};
+ const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+ const GPUVAddr shader_addr{base_addr + shader_config.offset};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ if (!cpu_shader_addr) {
+ LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+ last_shaders_valid = false;
+ return false;
+ }
+ const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
+ if (!shader_info) {
+ const u32 start_address{shader_config.offset};
+ GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+ shader_info = MakeShaderInfo(env, *cpu_shader_addr);
+ }
+ shader_infos[index] = shader_info;
+ unique_hashes[index] = shader_info->unique_hash;
+ }
+ last_shaders_valid = true;
+ return true;
+}
+
+const ShaderInfo* ShaderCache::ComputeShader() {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ const GPUVAddr shader_addr{program_base + qmd.program_start};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ if (!cpu_shader_addr) {
+ LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+ return nullptr;
+ }
+ if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
+ return shader;
+ }
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ return MakeShaderInfo(env, *cpu_shader_addr);
+}
+
+void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
+ const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
+ size_t env_index{};
+ const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
+ if (unique_hashes[index] == 0) {
+ continue;
+ }
+ const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+ auto& env{result.envs[index]};
+ const u32 start_address{maxwell3d.regs.shader_config[index].offset};
+ env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+ env.SetCachedSize(shader_infos[index]->size_bytes);
+ result.env_ptrs[env_index++] = &env;
+ }
+}
+
+ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
+ std::scoped_lock lock{lookup_mutex};
+
+ const auto it = lookup_cache.find(addr);
+ if (it == lookup_cache.end()) {
+ return nullptr;
+ }
+ return it->second->data;
+}
+
+void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
+ std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+ const VAddr addr_end = addr + size;
+ Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+ const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ invalidation_cache[page].push_back(entry);
+ }
+
+ storage.push_back(std::move(data));
+
+ rasterizer.UpdatePagesCachedCount(addr, size, 1);
+}
+
+void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
+ const VAddr addr_end = addr + size;
+ const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ auto it = invalidation_cache.find(page);
+ if (it == invalidation_cache.end()) {
+ continue;
+ }
+ InvalidatePageEntries(it->second, addr, addr_end);
+ }
+}
+
+void ShaderCache::RemovePendingShaders() {
+ if (marked_for_removal.empty()) {
+ return;
+ }
+ // Remove duplicates
+ std::ranges::sort(marked_for_removal);
+ marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
+ marked_for_removal.end());
+
+ std::vector<ShaderInfo*> removed_shaders;
+ removed_shaders.reserve(marked_for_removal.size());
+
+ std::scoped_lock lock{lookup_mutex};
+
+ for (Entry* const entry : marked_for_removal) {
+ removed_shaders.push_back(entry->data);
+
+ const auto it = lookup_cache.find(entry->addr_start);
+ ASSERT(it != lookup_cache.end());
+ lookup_cache.erase(it);
+ }
+ marked_for_removal.clear();
+
+ if (!removed_shaders.empty()) {
+ RemoveShadersFromStorage(std::move(removed_shaders));
+ }
+}
+
+void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+ size_t index = 0;
+ while (index < entries.size()) {
+ Entry* const entry = entries[index];
+ if (!entry->Overlaps(addr, addr_end)) {
+ ++index;
+ continue;
+ }
+
+ UnmarkMemory(entry);
+ RemoveEntryFromInvalidationCache(entry);
+ marked_for_removal.push_back(entry);
+ }
+}
+
+void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
+ const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
+ const auto entries_it = invalidation_cache.find(page);
+ ASSERT(entries_it != invalidation_cache.end());
+ std::vector<Entry*>& entries = entries_it->second;
+
+ const auto entry_it = std::ranges::find(entries, entry);
+ ASSERT(entry_it != entries.end());
+ entries.erase(entry_it);
+ }
+}
+
+void ShaderCache::UnmarkMemory(Entry* entry) {
+ if (!entry->is_memory_marked) {
+ return;
+ }
+ entry->is_memory_marked = false;
+
+ const VAddr addr = entry->addr_start;
+ const size_t size = entry->addr_end - addr;
+ rasterizer.UpdatePagesCachedCount(addr, size, -1);
+}
+
+void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
+ // Remove them from the cache
+ std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
+ return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
+ });
+}
+
+ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
+ auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+ Entry* const entry_pointer = entry.get();
+
+ lookup_cache.emplace(addr, std::move(entry));
+ return entry_pointer;
+}
+
+const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
+ auto info = std::make_unique<ShaderInfo>();
+ if (const std::optional<u64> cached_hash{env.Analyze()}) {
+ info->unique_hash = *cached_hash;
+ info->size_bytes = env.CachedSize();
+ } else {
+ // Slow path, not really hit on commercial games
+ // Build a control flow graph to get the real shader size
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+ Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
+ info->unique_hash = env.CalculateHash();
+ info->size_bytes = env.ReadSize();
+ }
+ const size_t size_bytes{info->size_bytes};
+ const ShaderInfo* const result{info.get()};
+ Register(std::move(info), cpu_addr, size_bytes);
+ return result;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -5,226 +5,147 @@
#pragma once
#include <algorithm>
+#include <array>
#include <memory>
#include <mutex>
+#include <span>
#include <unordered_map>
#include <utility>
#include <vector>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/shader_environment.h"
+
+namespace Tegra {
+class MemoryManager;
+}
namespace VideoCommon {
-template <class T>
+class GenericEnvironment;
+
+struct ShaderInfo {
+ u64 unique_hash{};
+ size_t size_bytes{};
+};
+
class ShaderCache {
static constexpr u64 PAGE_BITS = 14;
static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
+ static constexpr size_t NUM_PROGRAMS = 6;
+
struct Entry {
VAddr addr_start;
VAddr addr_end;
- T* data;
+ ShaderInfo* data;
bool is_memory_marked = true;
- constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+ bool Overlaps(VAddr start, VAddr end) const noexcept {
return start < addr_end && addr_start < end;
}
};
public:
- virtual ~ShaderCache() = default;
-
/// @brief Removes shaders inside a given region
/// @note Checks for ranges
/// @param addr Start address of the invalidation
/// @param size Number of bytes of the invalidation
- void InvalidateRegion(VAddr addr, std::size_t size) {
- std::scoped_lock lock{invalidation_mutex};
- InvalidatePagesInRegion(addr, size);
- RemovePendingShaders();
- }
+ void InvalidateRegion(VAddr addr, size_t size);
/// @brief Unmarks a memory region as cached and marks it for removal
/// @param addr Start address of the CPU write operation
/// @param size Number of bytes of the CPU write operation
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{invalidation_mutex};
- InvalidatePagesInRegion(addr, size);
- }
+ void OnCPUWrite(VAddr addr, size_t size);
/// @brief Flushes delayed removal operations
- void SyncGuestHost() {
- std::scoped_lock lock{invalidation_mutex};
- RemovePendingShaders();
- }
+ void SyncGuestHost();
- /// @brief Tries to obtain a cached shader starting in a given address
- /// @note Doesn't check for ranges, the given address has to be the start of the shader
- /// @param addr Start address of the shader, this doesn't cache for region
- /// @return Pointer to a valid shader, nullptr when nothing is found
- T* TryGet(VAddr addr) const {
- std::scoped_lock lock{lookup_mutex};
+protected:
+ struct GraphicsEnvironments {
+ std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
+ std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
- const auto it = lookup_cache.find(addr);
- if (it == lookup_cache.end()) {
- return nullptr;
+ std::span<Shader::Environment* const> Span() const noexcept {
+ return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
}
- return it->second->data;
- }
-
-protected:
- explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+ };
- /// @brief Register in the cache a given entry
- /// @param data Shader to store in the cache
- /// @param addr Start address of the shader that will be registered
- /// @param size Size in bytes of the shader
- void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
- std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_);
- const VAddr addr_end = addr + size;
- Entry* const entry = NewEntry(addr, addr_end, data.get());
+ /// @brief Update the hashes and information of shader stages
+ /// @param unique_hashes Shader hashes to store into when a stage is enabled
+ /// @return True no success, false on error
+ bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
- const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
- invalidation_cache[page].push_back(entry);
- }
+ /// @brief Returns information about the current compute shader
+ /// @return Pointer to a valid shader, nullptr on error
+ const ShaderInfo* ComputeShader();
- storage.push_back(std::move(data));
+ /// @brief Collect the current graphics environments
+ void GetGraphicsEnvironments(GraphicsEnvironments& result,
+ const std::array<u64, NUM_PROGRAMS>& unique_hashes);
- rasterizer.UpdatePagesCachedCount(addr, size, 1);
- }
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
- /// @brief Called when a shader is going to be removed
- /// @param shader Shader that will be removed
- /// @pre invalidation_cache is locked
- /// @pre lookup_mutex is locked
- virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+ std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
+ bool last_shaders_valid = false;
private:
+ /// @brief Tries to obtain a cached shader starting in a given address
+ /// @note Doesn't check for ranges, the given address has to be the start of the shader
+ /// @param addr Start address of the shader, this doesn't cache for region
+ /// @return Pointer to a valid shader, nullptr when nothing is found
+ ShaderInfo* TryGet(VAddr addr) const;
+
+ /// @brief Register in the cache a given entry
+ /// @param data Shader to store in the cache
+ /// @param addr Start address of the shader that will be registered
+ /// @param size Size in bytes of the shader
+ void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
+
/// @brief Invalidate pages in a given region
/// @pre invalidation_mutex is locked
- void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
- const VAddr addr_end = addr + size;
- const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
- auto it = invalidation_cache.find(page);
- if (it == invalidation_cache.end()) {
- continue;
- }
- InvalidatePageEntries(it->second, addr, addr_end);
- }
- }
+ void InvalidatePagesInRegion(VAddr addr, size_t size);
/// @brief Remove shaders marked for deletion
/// @pre invalidation_mutex is locked
- void RemovePendingShaders() {
- if (marked_for_removal.empty()) {
- return;
- }
- // Remove duplicates
- std::sort(marked_for_removal.begin(), marked_for_removal.end());
- marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
- marked_for_removal.end());
-
- std::vector<T*> removed_shaders;
- removed_shaders.reserve(marked_for_removal.size());
-
- std::scoped_lock lock{lookup_mutex};
-
- for (Entry* const entry : marked_for_removal) {
- removed_shaders.push_back(entry->data);
-
- const auto it = lookup_cache.find(entry->addr_start);
- ASSERT(it != lookup_cache.end());
- lookup_cache.erase(it);
- }
- marked_for_removal.clear();
-
- if (!removed_shaders.empty()) {
- RemoveShadersFromStorage(std::move(removed_shaders));
- }
- }
+ void RemovePendingShaders();
/// @brief Invalidates entries in a given range for the passed page
/// @param entries Vector of entries in the page, it will be modified on overlaps
/// @param addr Start address of the invalidation
/// @param addr_end Non-inclusive end address of the invalidation
/// @pre invalidation_mutex is locked
- void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
- std::size_t index = 0;
- while (index < entries.size()) {
- Entry* const entry = entries[index];
- if (!entry->Overlaps(addr, addr_end)) {
- ++index;
- continue;
- }
-
- UnmarkMemory(entry);
- RemoveEntryFromInvalidationCache(entry);
- marked_for_removal.push_back(entry);
- }
- }
+ void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
/// @brief Removes all references to an entry in the invalidation cache
/// @param entry Entry to remove from the invalidation cache
/// @pre invalidation_mutex is locked
- void RemoveEntryFromInvalidationCache(const Entry* entry) {
- const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
- const auto entries_it = invalidation_cache.find(page);
- ASSERT(entries_it != invalidation_cache.end());
- std::vector<Entry*>& entries = entries_it->second;
-
- const auto entry_it = std::find(entries.begin(), entries.end(), entry);
- ASSERT(entry_it != entries.end());
- entries.erase(entry_it);
- }
- }
+ void RemoveEntryFromInvalidationCache(const Entry* entry);
/// @brief Unmarks an entry from the rasterizer cache
/// @param entry Entry to unmark from memory
- void UnmarkMemory(Entry* entry) {
- if (!entry->is_memory_marked) {
- return;
- }
- entry->is_memory_marked = false;
-
- const VAddr addr = entry->addr_start;
- const std::size_t size = entry->addr_end - addr;
- rasterizer.UpdatePagesCachedCount(addr, size, -1);
- }
+ void UnmarkMemory(Entry* entry);
/// @brief Removes a vector of shaders from a list
/// @param removed_shaders Shaders to be removed from the storage
/// @pre invalidation_mutex is locked
/// @pre lookup_mutex is locked
- void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
- // Notify removals
- for (T* const shader : removed_shaders) {
- OnShaderRemoval(shader);
- }
-
- // Remove them from the cache
- const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
- return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
- removed_shaders.end();
- };
- std::erase_if(storage, is_removed);
- }
+ void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
/// @brief Creates a new entry in the lookup cache and returns its pointer
/// @pre lookup_mutex is locked
- Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
- auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
- Entry* const entry_pointer = entry.get();
+ Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
- lookup_cache.emplace(addr, std::move(entry));
- return entry_pointer;
- }
+ /// @brief Create a new shader entry and register it
+ const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
VideoCore::RasterizerInterface& rasterizer;
@@ -233,7 +154,7 @@ private:
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
- std::vector<std::unique_ptr<T>> storage;
+ std::vector<std::unique_ptr<ShaderInfo>> storage;
std::vector<Entry*> marked_for_removal;
};
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..8a4581c19
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,460 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "common/fs/fs.h"
+#include "common/logging/log.h"
+#include "shader_recompiler/environment.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader_environment.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
+
+constexpr size_t INST_SIZE = sizeof(u64);
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+static u64 MakeCbufKey(u32 index, u32 offset) {
+ return (static_cast<u64>(index) << 32) | offset;
+}
+
+static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
+ switch (entry.texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ return Shader::TextureType::Color1D;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return Shader::TextureType::Color2D;
+ case Tegra::Texture::TextureType::Texture3D:
+ return Shader::TextureType::Color3D;
+ case Tegra::Texture::TextureType::TextureCubemap:
+ return Shader::TextureType::ColorCube;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return Shader::TextureType::ColorArray1D;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return Shader::TextureType::ColorArray2D;
+ case Tegra::Texture::TextureType::Texture1DBuffer:
+ return Shader::TextureType::Buffer;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ return Shader::TextureType::ColorArrayCube;
+ default:
+ throw Shader::NotImplementedException("Unknown texture type");
+ }
+}
+
+GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_)
+ : gpu_memory{&gpu_memory_}, program_base{program_base_} {
+ start_address = start_address_;
+}
+
+GenericEnvironment::~GenericEnvironment() = default;
+
+u32 GenericEnvironment::TextureBoundBuffer() const {
+ return texture_bound;
+}
+
+u32 GenericEnvironment::LocalMemorySize() const {
+ return local_memory_size;
+}
+
+u32 GenericEnvironment::SharedMemorySize() const {
+ return shared_memory_size;
+}
+
+std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
+ return workgroup_size;
+}
+
+u64 GenericEnvironment::ReadInstruction(u32 address) {
+ read_lowest = std::min(read_lowest, address);
+ read_highest = std::max(read_highest, address);
+
+ if (address >= cached_lowest && address < cached_highest) {
+ return code[(address - cached_lowest) / INST_SIZE];
+ }
+ has_unbound_instructions = true;
+ return gpu_memory->Read<u64>(program_base + address);
+}
+
+std::optional<u64> GenericEnvironment::Analyze() {
+ const std::optional<u64> size{TryFindSize()};
+ if (!size) {
+ return std::nullopt;
+ }
+ cached_lowest = start_address;
+ cached_highest = start_address + static_cast<u32>(*size);
+ return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
+}
+
+void GenericEnvironment::SetCachedSize(size_t size_bytes) {
+ cached_lowest = start_address;
+ cached_highest = start_address + static_cast<u32>(size_bytes);
+ code.resize(CachedSize());
+ gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
+}
+
+size_t GenericEnvironment::CachedSize() const noexcept {
+ return cached_highest - cached_lowest + INST_SIZE;
+}
+
+size_t GenericEnvironment::ReadSize() const noexcept {
+ return read_highest - read_lowest + INST_SIZE;
+}
+
+bool GenericEnvironment::CanBeSerialized() const noexcept {
+ return !has_unbound_instructions;
+}
+
+u64 GenericEnvironment::CalculateHash() const {
+ const size_t size{ReadSize()};
+ const auto data{std::make_unique<char[]>(size)};
+ gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
+ return Common::CityHash64(data.get(), size);
+}
+
+void GenericEnvironment::Serialize(std::ofstream& file) const {
+ const u64 code_size{static_cast<u64>(CachedSize())};
+ const u64 num_texture_types{static_cast<u64>(texture_types.size())};
+ const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
+
+ file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
+ .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
+ .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+ .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
+ .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
+ .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
+ .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
+ .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
+ .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
+ .write(reinterpret_cast<const char*>(code.data()), code_size);
+ for (const auto [key, type] : texture_types) {
+ file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+ .write(reinterpret_cast<const char*>(&type), sizeof(type));
+ }
+ for (const auto [key, type] : cbuf_values) {
+ file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+ .write(reinterpret_cast<const char*>(&type), sizeof(type));
+ }
+ if (stage == Shader::Stage::Compute) {
+ file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
+ .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
+ } else {
+ file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
+ if (stage == Shader::Stage::Geometry) {
+ file.write(reinterpret_cast<const char*>(&gp_passthrough_mask),
+ sizeof(gp_passthrough_mask));
+ }
+ }
+}
+
+std::optional<u64> GenericEnvironment::TryFindSize() {
+ static constexpr size_t BLOCK_SIZE = 0x1000;
+ static constexpr size_t MAXIMUM_SIZE = 0x100000;
+
+ static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
+ static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
+
+ GPUVAddr guest_addr{program_base + start_address};
+ size_t offset{0};
+ size_t size{BLOCK_SIZE};
+ while (size <= MAXIMUM_SIZE) {
+ code.resize(size / INST_SIZE);
+ u64* const data = code.data() + offset / INST_SIZE;
+ gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
+ for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
+ const u64 inst = data[index / INST_SIZE];
+ if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
+ return offset + index;
+ }
+ }
+ guest_addr += BLOCK_SIZE;
+ size += BLOCK_SIZE;
+ offset += BLOCK_SIZE;
+ }
+ return std::nullopt;
+}
+
+Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
+ bool via_header_index, u32 raw) {
+ const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
+ const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
+ Tegra::Texture::TICEntry entry;
+ gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
+ const Shader::TextureType result{ConvertType(entry)};
+ texture_types.emplace(raw, result);
+ return result;
+}
+
+GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_,
+ Maxwell::ShaderProgram program, GPUVAddr program_base_,
+ u32 start_address_)
+ : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
+ gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
+ gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
+ switch (program) {
+ case Maxwell::ShaderProgram::VertexA:
+ stage = Shader::Stage::VertexA;
+ stage_index = 0;
+ break;
+ case Maxwell::ShaderProgram::VertexB:
+ stage = Shader::Stage::VertexB;
+ stage_index = 0;
+ break;
+ case Maxwell::ShaderProgram::TesselationControl:
+ stage = Shader::Stage::TessellationControl;
+ stage_index = 1;
+ break;
+ case Maxwell::ShaderProgram::TesselationEval:
+ stage = Shader::Stage::TessellationEval;
+ stage_index = 2;
+ break;
+ case Maxwell::ShaderProgram::Geometry:
+ stage = Shader::Stage::Geometry;
+ stage_index = 3;
+ break;
+ case Maxwell::ShaderProgram::Fragment:
+ stage = Shader::Stage::Fragment;
+ stage_index = 4;
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid program={}", program);
+ break;
+ }
+ const u64 local_size{sph.LocalMemorySize()};
+ ASSERT(local_size <= std::numeric_limits<u32>::max());
+ local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
+ texture_bound = maxwell3d->regs.tex_cb_index;
+}
+
+u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+ const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
+ ASSERT(cbuf.enabled);
+ u32 value{};
+ if (cbuf_offset < cbuf.size) {
+ value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
+ }
+ cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+ return value;
+}
+
+Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
+ const auto& regs{maxwell3d->regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
+}
+
+ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_)
+ : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
+ &kepler_compute_} {
+ const auto& qmd{kepler_compute->launch_description};
+ stage = Shader::Stage::Compute;
+ local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
+ texture_bound = kepler_compute->regs.tex_cb_index;
+ shared_memory_size = qmd.shared_alloc;
+ workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
+}
+
+u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+ const auto& qmd{kepler_compute->launch_description};
+ ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
+ const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
+ u32 value{};
+ if (cbuf_offset < cbuf.size) {
+ value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
+ }
+ cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+ return value;
+}
+
+Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
+ const auto& regs{kepler_compute->regs};
+ const auto& qmd{kepler_compute->launch_description};
+ return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
+}
+
+void FileEnvironment::Deserialize(std::ifstream& file) {
+ u64 code_size{};
+ u64 num_texture_types{};
+ u64 num_cbuf_values{};
+ file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
+ .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
+ .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+ .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
+ .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
+ .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
+ .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
+ .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
+ .read(reinterpret_cast<char*>(&stage), sizeof(stage));
+ code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
+ file.read(reinterpret_cast<char*>(code.get()), code_size);
+ for (size_t i = 0; i < num_texture_types; ++i) {
+ u32 key;
+ Shader::TextureType type;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key))
+ .read(reinterpret_cast<char*>(&type), sizeof(type));
+ texture_types.emplace(key, type);
+ }
+ for (size_t i = 0; i < num_cbuf_values; ++i) {
+ u64 key;
+ u32 value;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key))
+ .read(reinterpret_cast<char*>(&value), sizeof(value));
+ cbuf_values.emplace(key, value);
+ }
+ if (stage == Shader::Stage::Compute) {
+ file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
+ .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
+ } else {
+ file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
+ if (stage == Shader::Stage::Geometry) {
+ file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
+ }
+ }
+}
+
+u64 FileEnvironment::ReadInstruction(u32 address) {
+ if (address < read_lowest || address > read_highest) {
+ throw Shader::LogicError("Out of bounds address {}", address);
+ }
+ return code[(address - read_lowest) / sizeof(u64)];
+}
+
+u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+ const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
+ if (it == cbuf_values.end()) {
+ throw Shader::LogicError("Uncached read texture type");
+ }
+ return it->second;
+}
+
+Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
+ const auto it{texture_types.find(handle)};
+ if (it == texture_types.end()) {
+ throw Shader::LogicError("Uncached read texture type");
+ }
+ return it->second;
+}
+
+u32 FileEnvironment::LocalMemorySize() const {
+ return local_memory_size;
+}
+
+u32 FileEnvironment::SharedMemorySize() const {
+ return shared_memory_size;
+}
+
+u32 FileEnvironment::TextureBoundBuffer() const {
+ return texture_bound;
+}
+
+std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
+ return workgroup_size;
+}
+
+void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
+ const std::filesystem::path& filename, u32 cache_version) try {
+ std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
+ file.exceptions(std::ifstream::failbit);
+ if (!file.is_open()) {
+ LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ return;
+ }
+ if (file.tellp() == 0) {
+ // Write header
+ file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
+ .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
+ }
+ if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
+ return;
+ }
+ const u32 num_envs{static_cast<u32>(envs.size())};
+ file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
+ for (const GenericEnvironment* const env : envs) {
+ env->Serialize(file);
+ }
+ file.write(key.data(), key.size_bytes());
+
+} catch (const std::ios_base::failure& e) {
+ LOG_ERROR(Common_Filesystem, "{}", e.what());
+ if (!Common::FS::RemoveFile(filename)) {
+ LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ }
+}
+
+void LoadPipelines(
+ std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
+ Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
+ Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
+ std::ifstream file(filename, std::ios::binary | std::ios::ate);
+ if (!file.is_open()) {
+ return;
+ }
+ file.exceptions(std::ifstream::failbit);
+ const auto end{file.tellg()};
+ file.seekg(0, std::ios::beg);
+
+ std::array<char, 8> magic_number;
+ u32 cache_version;
+ file.read(magic_number.data(), magic_number.size())
+ .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
+ if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) {
+ file.close();
+ if (Common::FS::RemoveFile(filename)) {
+ if (magic_number != MAGIC_NUMBER) {
+ LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
+ }
+ if (cache_version != expected_cache_version) {
+ LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
+ }
+ } else {
+ LOG_ERROR(Common_Filesystem,
+ "Invalid pipeline cache file and failed to delete it in \"{}\"",
+ Common::FS::PathToUTF8String(filename));
+ }
+ return;
+ }
+ while (file.tellg() != end) {
+ if (stop_loading.stop_requested()) {
+ return;
+ }
+ u32 num_envs{};
+ file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
+ std::vector<FileEnvironment> envs(num_envs);
+ for (FileEnvironment& env : envs) {
+ env.Deserialize(file);
+ }
+ if (envs.front().ShaderStage() == Shader::Stage::Compute) {
+ load_compute(file, std::move(envs.front()));
+ } else {
+ load_graphics(file, std::move(envs));
+ }
+ }
+
+} catch (const std::ios_base::failure& e) {
+ LOG_ERROR(Common_Filesystem, "{}", e.what());
+ if (!Common::FS::RemoveFile(filename)) {
+ LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..2079979db
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <filesystem>
+#include <iosfwd>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <span>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/unique_function.h"
+#include "shader_recompiler/environment.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+class Memorymanager;
+}
+
+namespace VideoCommon {
+
+class GenericEnvironment : public Shader::Environment {
+public:
+ explicit GenericEnvironment() = default;
+ explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_);
+
+ ~GenericEnvironment() override;
+
+ [[nodiscard]] u32 TextureBoundBuffer() const final;
+
+ [[nodiscard]] u32 LocalMemorySize() const final;
+
+ [[nodiscard]] u32 SharedMemorySize() const final;
+
+ [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
+
+ [[nodiscard]] u64 ReadInstruction(u32 address) final;
+
+ [[nodiscard]] std::optional<u64> Analyze();
+
+ void SetCachedSize(size_t size_bytes);
+
+ [[nodiscard]] size_t CachedSize() const noexcept;
+
+ [[nodiscard]] size_t ReadSize() const noexcept;
+
+ [[nodiscard]] bool CanBeSerialized() const noexcept;
+
+ [[nodiscard]] u64 CalculateHash() const;
+
+ void Serialize(std::ofstream& file) const;
+
+protected:
+ std::optional<u64> TryFindSize();
+
+ Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
+ u32 raw);
+
+ Tegra::MemoryManager* gpu_memory{};
+ GPUVAddr program_base{};
+
+ std::vector<u64> code;
+ std::unordered_map<u32, Shader::TextureType> texture_types;
+ std::unordered_map<u64, u32> cbuf_values;
+
+ u32 local_memory_size{};
+ u32 texture_bound{};
+ u32 shared_memory_size{};
+ std::array<u32, 3> workgroup_size{};
+
+ u32 read_lowest = std::numeric_limits<u32>::max();
+ u32 read_highest = 0;
+
+ u32 cached_lowest = std::numeric_limits<u32>::max();
+ u32 cached_highest = 0;
+
+ bool has_unbound_instructions = false;
+};
+
+class GraphicsEnvironment final : public GenericEnvironment {
+public:
+ explicit GraphicsEnvironment() = default;
+ explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
+ GPUVAddr program_base_, u32 start_address_);
+
+ ~GraphicsEnvironment() override = default;
+
+ u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+ Shader::TextureType ReadTextureType(u32 handle) override;
+
+private:
+ Tegra::Engines::Maxwell3D* maxwell3d{};
+ size_t stage_index{};
+};
+
+class ComputeEnvironment final : public GenericEnvironment {
+public:
+ explicit ComputeEnvironment() = default;
+ explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_);
+
+ ~ComputeEnvironment() override = default;
+
+ u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+ Shader::TextureType ReadTextureType(u32 handle) override;
+
+private:
+ Tegra::Engines::KeplerCompute* kepler_compute{};
+};
+
+class FileEnvironment final : public Shader::Environment {
+public:
+ FileEnvironment() = default;
+ ~FileEnvironment() override = default;
+
+ FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
+ FileEnvironment(FileEnvironment&&) noexcept = default;
+
+ FileEnvironment& operator=(const FileEnvironment&) = delete;
+ FileEnvironment(const FileEnvironment&) = delete;
+
+ void Deserialize(std::ifstream& file);
+
+ [[nodiscard]] u64 ReadInstruction(u32 address) override;
+
+ [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+ [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
+
+ [[nodiscard]] u32 LocalMemorySize() const override;
+
+ [[nodiscard]] u32 SharedMemorySize() const override;
+
+ [[nodiscard]] u32 TextureBoundBuffer() const override;
+
+ [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
+
+private:
+ std::unique_ptr<u64[]> code;
+ std::unordered_map<u32, Shader::TextureType> texture_types;
+ std::unordered_map<u64, u32> cbuf_values;
+ std::array<u32, 3> workgroup_size{};
+ u32 local_memory_size{};
+ u32 shared_memory_size{};
+ u32 texture_bound{};
+ u32 read_lowest{};
+ u32 read_highest{};
+};
+
+void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
+ const std::filesystem::path& filename, u32 cache_version);
+
+template <typename Key, typename Envs>
+void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename,
+ u32 cache_version) {
+ static_assert(std::is_trivially_copyable_v<Key>);
+ static_assert(std::has_unique_object_representations_v<Key>);
+ SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
+ std::span(envs.data(), envs.size()), filename, cache_version);
+}
+
+void LoadPipelines(
+ std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
+ Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
+ Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index 693e47158..dc6995b46 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -2,42 +2,35 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <mutex>
+#include <atomic>
+#include <chrono>
+#include <optional>
+
#include "video_core/shader_notify.h"
using namespace std::chrono_literals;
namespace VideoCore {
-namespace {
-constexpr auto UPDATE_TICK = 32ms;
-}
-
-ShaderNotify::ShaderNotify() = default;
-ShaderNotify::~ShaderNotify() = default;
-std::size_t ShaderNotify::GetShadersBuilding() {
- const auto now = std::chrono::high_resolution_clock::now();
- const auto diff = now - last_update;
- if (diff > UPDATE_TICK) {
- std::shared_lock lock(mutex);
- last_updated_count = accurate_count;
+const auto TIME_TO_STOP_REPORTING = 2s;
+
+int ShaderNotify::ShadersBuilding() noexcept {
+ const int now_complete = num_complete.load(std::memory_order::relaxed);
+ const int now_building = num_building.load(std::memory_order::relaxed);
+ if (now_complete == now_building) {
+ const auto now = std::chrono::high_resolution_clock::now();
+ if (completed && num_complete == num_when_completed) {
+ if (now - complete_time > TIME_TO_STOP_REPORTING) {
+ report_base = now_complete;
+ completed = false;
+ }
+ } else {
+ completed = true;
+ num_when_completed = num_complete;
+ complete_time = now;
+ }
}
- return last_updated_count;
-}
-
-std::size_t ShaderNotify::GetShadersBuildingAccurate() {
- std::shared_lock lock{mutex};
- return accurate_count;
-}
-
-void ShaderNotify::MarkShaderComplete() {
- std::unique_lock lock{mutex};
- accurate_count--;
-}
-
-void ShaderNotify::MarkSharderBuilding() {
- std::unique_lock lock{mutex};
- accurate_count++;
+ return now_building - report_base;
}
} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index a9c92d179..ad363bfb5 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -4,26 +4,30 @@
#pragma once
+#include <atomic>
#include <chrono>
-#include <shared_mutex>
-#include "common/common_types.h"
+#include <optional>
namespace VideoCore {
class ShaderNotify {
public:
- ShaderNotify();
- ~ShaderNotify();
+ [[nodiscard]] int ShadersBuilding() noexcept;
- std::size_t GetShadersBuilding();
- std::size_t GetShadersBuildingAccurate();
+ void MarkShaderComplete() noexcept {
+ ++num_complete;
+ }
- void MarkShaderComplete();
- void MarkSharderBuilding();
+ void MarkShaderBuilding() noexcept {
+ ++num_building;
+ }
private:
- std::size_t last_updated_count{};
- std::size_t accurate_count{};
- std::shared_mutex mutex;
- std::chrono::high_resolution_clock::time_point last_update{};
+ std::atomic_int num_building{};
+ std::atomic_int num_complete{};
+ int report_base{};
+
+ bool completed{};
+ int num_when_completed{};
+ std::chrono::high_resolution_clock::time_point complete_time;
};
} // namespace VideoCore
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 6308aef94..eb1746265 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -283,4 +283,11 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
}
+u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) {
+ constexpr u64 RGBA8_PIXEL_SIZE = 4;
+ const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) *
+ static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE;
+ return (base_size * base_block_size) / BytesPerBlock(format);
+}
+
} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index c40ab89d0..1503db81f 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format);
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
+u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
+
} // namespace VideoCore::Surface
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
return "Invalid";
}
-std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
+std::string Name(const ImageViewBase& image_view) {
const u32 width = image_view.size.width;
const u32 height = image_view.size.height;
const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
const u32 num_layers = image_view.range.extent.layers;
const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
- switch (type.value_or(image_view.type)) {
+ switch (image_view.type) {
case ImageViewType::e1D:
return fmt::format("ImageView 1D {}{}", width, level);
case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
[[nodiscard]] std::string Name(const ImageBase& image);
-[[nodiscard]] std::string Name(const ImageViewBase& image_view,
- std::optional<ImageViewType> type = std::nullopt);
+[[nodiscard]] std::string Name(const ImageViewBase& image_view);
[[nodiscard]] std::string Name(const RenderTargets& render_targets);
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 9914926b3..6052d148a 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
}
}
+ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
+ : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
+
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
if (other_addr < gpu_addr) {
// Subresource address can't be lower than the base
@@ -82,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const
if (info.type != ImageType::e3D) {
const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
const auto end = mip_level_offsets.begin() + info.resources.levels;
- const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
+ const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset));
if (layer > info.resources.layers || it == end) {
return std::nullopt;
}
@@ -113,6 +116,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
image_view_ids.push_back(image_view_id);
}
+bool ImageBase::IsSafeDownload() const noexcept {
+ // Skip images that were not modified from the GPU
+ if (False(flags & ImageFlagBits::GpuModified)) {
+ return false;
+ }
+ // Skip images that .are. modified from the CPU
+ // We don't want to write sensitive data from the guest
+ if (True(flags & ImageFlagBits::CpuModified)) {
+ return false;
+ }
+ if (info.num_samples > 1) {
+ LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ return false;
+ }
+ return true;
+}
+
+void ImageBase::CheckBadOverlapState() {
+ if (False(flags & ImageFlagBits::BadOverlap)) {
+ return;
+ }
+ if (!overlapping_images.empty()) {
+ return;
+ }
+ flags &= ~ImageFlagBits::BadOverlap;
+}
+
+void ImageBase::CheckAliasState() {
+ if (False(flags & ImageFlagBits::Alias)) {
+ return;
+ }
+ if (!aliased_images.empty()) {
+ return;
+ }
+ flags &= ~ImageFlagBits::Alias;
+}
+
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
ASSERT(lhs.info.type == rhs.info.type);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index b7f3b7e43..ff1feda9b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,6 +25,14 @@ enum class ImageFlagBits : u32 {
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
+ Remapped = 1 << 8, ///< Image has been remapped.
+ Sparse = 1 << 9, ///< Image has non continous submemory.
+
+ // Garbage Collection Flags
+ BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher
+ ///< garbage collection priority
+ Alias = 1 << 11, ///< This image has aliases and has priority on garbage
+ ///< collection
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@@ -44,11 +52,22 @@ struct ImageBase {
void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
+ [[nodiscard]] bool IsSafeDownload() const noexcept;
+
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
}
+ [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_gpu_addr + overlap_size;
+ const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
+ return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+ }
+
+ void CheckBadOverlapState();
+ void CheckAliasState();
+
ImageInfo info;
u32 guest_size_bytes = 0;
@@ -72,6 +91,30 @@ struct ImageBase {
std::vector<SubresourceBase> slice_subresources;
std::vector<AliasedImage> aliased_images;
+ std::vector<ImageId> overlapping_images;
+ ImageMapId map_view_id{};
+};
+
+struct ImageMapView {
+ explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
+
+ [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
+ const VAddr overlap_end = overlap_cpu_addr + overlap_size;
+ const VAddr cpu_addr_end = cpu_addr + size;
+ return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
+ }
+
+ [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
+ const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
+ const GPUVAddr gpu_addr_end = gpu_addr + size;
+ return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
+ }
+
+ GPUVAddr gpu_addr;
+ VAddr cpu_addr;
+ size_t size;
+ ImageId image_id;
+ bool picked{};
};
struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index e8d632f9e..450becbeb 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
}
}
+ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
+ : format{info.format}, type{ImageViewType::Buffer}, size{
+ .width = info.size.width,
+ .height = 1,
+ .depth = 1,
+ } {
+ ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
+}
+
ImageViewBase::ImageViewBase(const NullImageParams&) {}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 73954167e..903f715c5 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
struct ImageViewBase {
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
ImageId image_id);
+ explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
explicit ImageViewBase(const NullImageParams&);
[[nodiscard]] bool IsBuffer() const noexcept {
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
index 9b9544b07..0cb227d69 100644
--- a/src/video_core/texture_cache/render_targets.h
+++ b/src/video_core/texture_cache/render_targets.h
@@ -24,10 +24,10 @@ struct RenderTargets {
return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
}
- std::array<ImageViewId, NUM_RT> color_buffer_ids;
- ImageViewId depth_buffer_id;
+ std::array<ImageViewId, NUM_RT> color_buffer_ids{};
+ ImageViewId depth_buffer_id{};
std::array<u8, NUM_RT> draw_buffers{};
- Extent2D size;
+ Extent2D size{};
};
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index eae3be6ea..6180b8c0e 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <bit>
#include <concepts>
#include <numeric>
#include <type_traits>
@@ -32,6 +33,60 @@ template <class T>
requires std::is_nothrow_move_assignable_v<T>&&
std::is_nothrow_move_constructible_v<T> class SlotVector {
public:
+ class Iterator {
+ friend SlotVector<T>;
+
+ public:
+ constexpr Iterator() = default;
+
+ Iterator& operator++() noexcept {
+ const u64* const bitset = slot_vector->stored_bitset.data();
+ const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
+ if (id.index < size) {
+ do {
+ ++id.index;
+ } while (id.index < size && !IsValid(bitset));
+ if (id.index == size) {
+ id.index = SlotId::INVALID_INDEX;
+ }
+ }
+ return *this;
+ }
+
+ Iterator operator++(int) noexcept {
+ const Iterator copy{*this};
+ ++*this;
+ return copy;
+ }
+
+ bool operator==(const Iterator& other) const noexcept {
+ return id.index == other.id.index;
+ }
+
+ bool operator!=(const Iterator& other) const noexcept {
+ return id.index != other.id.index;
+ }
+
+ std::pair<SlotId, T*> operator*() const noexcept {
+ return {id, std::addressof((*slot_vector)[id])};
+ }
+
+ T* operator->() const noexcept {
+ return std::addressof((*slot_vector)[id]);
+ }
+
+ private:
+ Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
+ : slot_vector{slot_vector_}, id{id_} {}
+
+ bool IsValid(const u64* bitset) const noexcept {
+ return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
+ }
+
+ SlotVector<T>* slot_vector;
+ SlotId id;
+ };
+
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
@@ -70,6 +125,20 @@ public:
ResetStorageBit(id.index);
}
+ [[nodiscard]] Iterator begin() noexcept {
+ const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
+ if (it == stored_bitset.end()) {
+ return end();
+ }
+ const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
+ const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
+ return Iterator(this, first_id);
+ }
+
+ [[nodiscard]] Iterator end() noexcept {
+ return Iterator(this, SlotId{SlotId::INVALID_INDEX});
+ }
+
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
@@ -140,7 +209,6 @@ private:
Entry* values = nullptr;
size_t values_capacity = 0;
- size_t values_size = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 59b7c678b..f34c9d9ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,15 +13,17 @@
#include <span>
#include <type_traits>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
-#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/literals.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
@@ -57,6 +59,7 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType;
+using namespace Common::Literals;
template <class P>
class TextureCache {
@@ -69,12 +72,17 @@ class TextureCache {
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
+ /// True when the API can provide info about the memory of the device.
+ static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
+ static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
+ static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
+
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
@@ -109,6 +117,9 @@ public:
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
+ /// Mark an image as modified from the GPU
+ void MarkModification(ImageId id) noexcept;
+
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
@@ -145,12 +156,13 @@ public:
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
+ /// Remove images in a region
+ void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
- const Tegra::Engines::Fermi2D::Config& copy,
- std::optional<Region2D> src_region_override = {},
- std::optional<Region2D> dst_region_override = {});
+ const Tegra::Engines::Fermi2D::Config& copy);
/// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations.
@@ -183,7 +195,22 @@ public:
private:
/// Iterate over all page indices in a range
template <typename Func>
- static void ForEachPage(VAddr addr, size_t size, Func&& func) {
+ static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
+ static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+ const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ if constexpr (RETURNS_BOOL) {
+ if (func(page)) {
+ break;
+ }
+ } else {
+ func(page);
+ }
+ }
+ }
+
+ template <typename Func>
+ static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -197,6 +224,9 @@ private:
}
}
+ /// Runs the Garbage Collector.
+ void RunGarbageCollector();
+
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
@@ -210,7 +240,7 @@ private:
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
- void RefreshContents(Image& image);
+ void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
@@ -259,6 +289,16 @@ private:
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
+ template <typename Func>
+ void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+ template <typename Func>
+ void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+ /// Iterates over all the images in a region calling func
+ template <typename Func>
+ void ForEachSparseSegment(ImageBase& image, Func&& func);
+
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
@@ -269,10 +309,10 @@ private:
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
- void TrackImage(ImageBase& image);
+ void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
- void UntrackImage(ImageBase& image);
+ void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
@@ -330,11 +370,22 @@ private:
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
+
+ std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+
+ VAddr virtual_invalid_space{};
bool has_deleted_images = false;
+ u64 total_used_memory = 0;
+ u64 minimum_memory;
+ u64 expected_memory;
+ u64 critical_memory;
SlotVector<Image> slot_images;
+ SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
@@ -353,6 +404,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
+ typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P>
@@ -373,11 +425,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// This way the null resource becomes a compile time constant
void(slot_image_views.insert(runtime, NullImageParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
+
+ deletion_iterator = slot_images.begin();
+
+ if constexpr (HAS_DEVICE_MEMORY_INFO) {
+ const auto device_memory = runtime.GetDeviceLocalMemory();
+ const u64 possible_expected_memory = (device_memory * 3) / 10;
+ const u64 possible_critical_memory = (device_memory * 6) / 10;
+ expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
+ critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
+ minimum_memory = 0;
+ } else {
+ // on OGL we can be more conservatives as the driver takes care.
+ expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
+ critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
+ minimum_memory = expected_memory;
+ }
+}
+
+template <class P>
+void TextureCache<P>::RunGarbageCollector() {
+ const bool high_priority_mode = total_used_memory >= expected_memory;
+ const bool aggressive_mode = total_used_memory >= critical_memory;
+ const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
+ int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
+ for (; num_iterations > 0; --num_iterations) {
+ if (deletion_iterator == slot_images.end()) {
+ deletion_iterator = slot_images.begin();
+ if (deletion_iterator == slot_images.end()) {
+ break;
+ }
+ }
+ auto [image_id, image_tmp] = *deletion_iterator;
+ Image* image = image_tmp; // fix clang error.
+ const bool is_alias = True(image->flags & ImageFlagBits::Alias);
+ const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
+ const bool must_download = image->IsSafeDownload();
+ bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
+ const u64 ticks_needed =
+ is_bad_overlap
+ ? ticks_to_destroy >> 4
+ : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
+ should_care |= aggressive_mode;
+ if (should_care && image->frame_tick + ticks_needed < frame_tick) {
+ if (is_bad_overlap) {
+ const bool overlap_check = std::ranges::all_of(
+ image->overlapping_images, [&, image](const ImageId& overlap_id) {
+ auto& overlap = slot_images[overlap_id];
+ return overlap.frame_tick >= image->frame_tick;
+ });
+ if (!overlap_check) {
+ ++deletion_iterator;
+ continue;
+ }
+ }
+ if (!is_bad_overlap && must_download) {
+ const bool alias_check = std::ranges::none_of(
+ image->aliased_images, [&, image](const AliasedImage& alias) {
+ auto& alias_image = slot_images[alias.id];
+ return (alias_image.frame_tick < image->frame_tick) ||
+ (alias_image.modification_tick < image->modification_tick);
+ });
+
+ if (alias_check) {
+ auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
+ const auto copies = FullDownloadCopies(image->info);
+ image->DownloadMemory(map, copies);
+ runtime.Finish();
+ SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
+ }
+ }
+ if (True(image->flags & ImageFlagBits::Tracked)) {
+ UntrackImage(*image, image_id);
+ }
+ UnregisterImage(image_id);
+ DeleteImage(image_id);
+ if (is_bad_overlap) {
+ ++num_iterations;
+ }
+ }
+ ++deletion_iterator;
+ }
}
template <class P>
void TextureCache<P>::TickFrame() {
- // Tick sentenced resources in this order to ensure they are destroyed in the right order
+ if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
+ RunGarbageCollector();
+ }
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
@@ -395,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
}
template <class P>
+void TextureCache<P>::MarkModification(ImageId id) noexcept {
+ MarkModification(slot_images[id]);
+}
+
+template <class P>
void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids) {
FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
@@ -408,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
- [[unlikely]] if (index > graphics_sampler_table.Limit()) {
- LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ if (index > graphics_sampler_table.Limit()) {
+ LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
SamplerId& id = graphics_sampler_ids[index];
- [[unlikely]] if (is_new) {
+ if (is_new) {
id = FindSampler(descriptor);
}
return &slot_samplers[id];
@@ -422,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
- [[unlikely]] if (index > compute_sampler_table.Limit()) {
- LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ if (index > compute_sampler_table.Limit()) {
+ LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
const auto [descriptor, is_new] = compute_sampler_table.Read(index);
SamplerId& id = compute_sampler_ids[index];
- [[unlikely]] if (is_new) {
+ if (is_new) {
id = FindSampler(descriptor);
}
return &slot_samplers[id];
@@ -467,6 +607,12 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::RenderTargets]) {
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
+ }
+ const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
+ PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
return;
}
flags[Dirty::RenderTargets] = false;
@@ -523,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
u32 index) {
if (index > table.Limit()) {
- LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
+ LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
return NULL_IMAGE_VIEW_ID;
}
const auto [descriptor, is_new] = table.Read(index);
@@ -560,7 +706,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
return;
}
image.flags |= ImageFlagBits::CpuModified;
- UntrackImage(image);
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, image_id);
+ }
});
}
@@ -568,17 +716,7 @@ template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images;
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
- // Skip images that were not modified from the GPU
- if (False(image.flags & ImageFlagBits::GpuModified)) {
- return;
- }
- // Skip images that .are. modified from the CPU
- // We don't want to write sensitive data from the guest
- if (True(image.flags & ImageFlagBits::CpuModified)) {
- return;
- }
- if (image.info.num_samples > 1) {
- LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
+ if (!image.IsSafeDownload()) {
return;
}
image.flags &= ~ImageFlagBits::GpuModified;
@@ -607,7 +745,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) {
- UntrackImage(image);
+ UntrackImage(image, id);
}
UnregisterImage(id);
DeleteImage(id);
@@ -615,11 +753,26 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
}
template <class P>
+void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
+ std::vector<ImageId> deleted_images;
+ ForEachImageInRegionGPU(gpu_addr, size,
+ [&](ImageId id, Image&) { deleted_images.push_back(id); });
+ for (const ImageId id : deleted_images) {
+ Image& image = slot_images[id];
+ if (True(image.flags & ImageFlagBits::Remapped)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Remapped;
+ if (True(image.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(image, id);
+ }
+ }
+}
+
+template <class P>
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
- const Tegra::Engines::Fermi2D::Config& copy,
- std::optional<Region2D> src_override,
- std::optional<Region2D> dst_override) {
+ const Tegra::Engines::Fermi2D::Config& copy) {
const BlitImages images = GetBlitImages(dst, src);
const ImageId dst_id = images.dst_id;
const ImageId src_id = images.src_id;
@@ -630,47 +783,25 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const ImageBase& src_image = slot_images[src_id];
// TODO: Deduplicate
- const std::optional dst_base = dst_image.TryFindBase(dst.Address());
- const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
- const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
- const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
- const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
-
- // out of bounds texture blit checking
- const bool use_override = src_override.has_value();
- const s32 src_x0 = copy.src_x0 >> src_samples_x;
- s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
- const s32 src_y0 = copy.src_y0 >> src_samples_y;
- const s32 src_y1 = copy.src_y1 >> src_samples_y;
-
- const auto src_width = static_cast<s32>(src_image.info.size.width);
- const bool width_oob = src_x1 > src_width;
- const auto width_diff = width_oob ? src_x1 - src_width : 0;
- if (width_oob) {
- src_x1 = src_width;
- }
-
- const Region2D src_dimensions{
- Offset2D{.x = src_x0, .y = src_y0},
- Offset2D{.x = src_x1, .y = src_y1},
- };
- const auto src_region = use_override ? *src_override : src_dimensions;
-
const std::optional src_base = src_image.TryFindBase(src.Address());
const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
- const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+ const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
+ const Region2D src_region{
+ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
+ Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
+ };
- const s32 dst_x0 = copy.dst_x0 >> dst_samples_x;
- const s32 dst_x1 = copy.dst_x1 >> dst_samples_x;
- const s32 dst_y0 = copy.dst_y0 >> dst_samples_y;
- const s32 dst_y1 = copy.dst_y1 >> dst_samples_y;
- const Region2D dst_dimensions{
- Offset2D{.x = dst_x0, .y = dst_y0},
- Offset2D{.x = dst_x1 - width_diff, .y = dst_y1},
+ const std::optional dst_base = dst_image.TryFindBase(dst.Address());
+ const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
+ const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
+ const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
+ const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
+ const Region2D dst_region{
+ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
+ Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
};
- const auto dst_region = use_override ? *dst_override : dst_dimensions;
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -687,21 +818,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
copy.operation);
}
-
- if (width_oob) {
- // Continue copy of the oob region of the texture on the next row
- auto oob_src = src;
- oob_src.height++;
- const Region2D src_region_override{
- Offset2D{.x = 0, .y = src_y0 + 1},
- Offset2D{.x = width_diff, .y = src_y1 + 1},
- };
- const Region2D dst_region_override{
- Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
- Offset2D{.x = dst_x1, .y = dst_y1},
- };
- BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
- }
}
template <class P>
@@ -745,9 +861,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
if (it == page_table.end()) {
return nullptr;
}
- const auto& image_ids = it->second;
- for (const ImageId image_id : image_ids) {
- const ImageBase& image = slot_images[image_id];
+ const auto& image_map_ids = it->second;
+ for (const ImageMapId map_id : image_map_ids) {
+ const ImageMapView& map = slot_map_views[map_id];
+ const ImageBase& image = slot_images[map.image_id];
if (image.cpu_addr != cpu_addr) {
continue;
}
@@ -827,13 +944,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
}
template <class P>
-void TextureCache<P>::RefreshContents(Image& image) {
+void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
if (False(image.flags & ImageFlagBits::CpuModified)) {
// Only upload modified images
return;
}
image.flags &= ~ImageFlagBits::CpuModified;
- TrackImage(image);
+ TrackImage(image, image_id);
if (image.info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -859,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
- } else if (image.info.type == ImageType::Buffer) {
- const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
- image.UploadMemory(staging, copies);
} else {
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
image.UploadMemory(staging, copies);
@@ -870,7 +984,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
- if (!IsValidAddress(gpu_memory, config)) {
+ if (!IsValidEntry(gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
const auto [pair, is_new] = image_views.try_emplace(config);
@@ -884,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
template <class P>
ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
const ImageInfo info(config);
- const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
+ if (info.type == ImageType::Buffer) {
+ const ImageViewInfo view_info(config, 0);
+ return slot_image_views.insert(runtime, info, view_info, config.Address());
+ }
+ const u32 layer_offset = config.BaseLayer() * info.layer_stride;
+ const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
if (!image_id) {
return NULL_IMAGE_VIEW_ID;
@@ -912,14 +1031,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
- return ImageId{};
+ cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+ if (!cpu_addr) {
+ return ImageId{};
+ }
}
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
ImageId image_id;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+ if (True(existing_image.flags & ImageFlagBits::Remapped)) {
+ return false;
+ }
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
const bool strict_size = False(options & RelaxedOptions::Size) &&
True(existing_image.flags & ImageFlagBits::Strong);
@@ -945,7 +1070,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ const auto size = CalculateGuestSizeInBytes(info);
+ cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+ if (!cpu_addr) {
+ const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
+ virtual_invalid_space += Common::AlignUp(size, 32);
+ cpu_addr = std::optional<VAddr>(fake_addr);
+ }
+ }
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
const Image& image = slot_images[image_id];
@@ -965,10 +1099,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
std::vector<ImageId> overlap_ids;
+ std::unordered_set<ImageId> overlaps_found;
std::vector<ImageId> left_aliased_ids;
std::vector<ImageId> right_aliased_ids;
- ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
- if (info.type != overlap.info.type) {
+ std::unordered_set<ImageId> ignore_textures;
+ std::vector<ImageId> bad_overlap_ids;
+ const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
+ if (True(overlap.flags & ImageFlagBits::Remapped)) {
+ ignore_textures.insert(overlap_id);
return;
}
if (info.type == ImageType::Linear) {
@@ -978,6 +1116,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
return;
}
+ overlaps_found.insert(overlap_id);
static constexpr bool strict_size = true;
const std::optional<OverlapResult> solution = ResolveOverlap(
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -992,16 +1131,49 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::Alias;
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::Alias;
+ } else {
+ bad_overlap_ids.push_back(overlap_id);
+ overlap.flags |= ImageFlagBits::BadOverlap;
}
- });
+ };
+ ForEachImageInRegion(cpu_addr, size_bytes, region_check);
+ const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
+ if (!overlaps_found.contains(overlap_id)) {
+ if (True(overlap.flags & ImageFlagBits::Remapped)) {
+ ignore_textures.insert(overlap_id);
+ }
+ if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
+ ignore_textures.insert(overlap_id);
+ }
+ }
+ };
+ ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
+ if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+ new_image.flags |= ImageFlagBits::Sparse;
+ }
+
+ for (const ImageId overlap_id : ignore_textures) {
+ Image& overlap = slot_images[overlap_id];
+ if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+ UNIMPLEMENTED();
+ }
+ if (True(overlap.flags & ImageFlagBits::Tracked)) {
+ UntrackImage(overlap, overlap_id);
+ }
+ UnregisterImage(overlap_id);
+ DeleteImage(overlap_id);
+ }
+
// TODO: Only upload what we need
- RefreshContents(new_image);
+ RefreshContents(new_image, new_image_id);
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
@@ -1013,7 +1185,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
runtime.CopyImage(new_image, overlap, copies);
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
- UntrackImage(overlap);
+ UntrackImage(overlap, overlap_id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
@@ -1022,10 +1194,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId aliased_id : right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
+ new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
+ new_image.flags |= ImageFlagBits::Alias;
+ }
+ for (const ImageId aliased_id : bad_overlap_ids) {
+ ImageBase& aliased = slot_images[aliased_id];
+ aliased.overlapping_images.push_back(new_image_id);
+ new_image.overlapping_images.push_back(aliased_id);
+ new_image.flags |= ImageFlagBits::BadOverlap;
}
RegisterImage(new_image_id);
return new_image_id;
@@ -1140,7 +1320,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
- ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
+ boost::container::small_vector<ImageMapId, 32> maps;
+ ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if constexpr (BOOL_BREAK) {
@@ -1149,12 +1330,63 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
return;
}
}
+ for (const ImageMapId map_id : it->second) {
+ ImageMapView& map = slot_map_views[map_id];
+ if (map.picked) {
+ continue;
+ }
+ if (!map.Overlaps(cpu_addr, size)) {
+ continue;
+ }
+ map.picked = true;
+ maps.push_back(map_id);
+ Image& image = slot_images[map.image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(map.image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(map.image_id, image)) {
+ return true;
+ }
+ } else {
+ func(map.image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+ }
+ for (const ImageMapId map_id : maps) {
+ slot_map_views[map_id].picked = false;
+ }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 8> images;
+ ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ const auto it = gpu_page_table.find(page);
+ if (it == gpu_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
- if (!image.Overlaps(cpu_addr, size)) {
+ if (!image.OverlapsGPU(gpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
@@ -1177,6 +1409,69 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
}
template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
+ static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
+ boost::container::small_vector<ImageId, 8> images;
+ ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
+ const auto it = sparse_page_table.find(page);
+ if (it == sparse_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ if (!image.OverlapsGPU(gpu_addr, size)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
+ }
+ } else {
+ func(image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
+ for (const ImageId image_id : images) {
+ slot_images[image_id].flags &= ~ImageFlagBits::Picked;
+ }
+}
+
+template <class P>
+template <typename Func>
+void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
+ using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
+ static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
+ const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+ for (auto& segment : segments) {
+ const auto gpu_addr = segment.first;
+ const auto size = segment.second;
+ std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
+ if constexpr (RETURNS_BOOL) {
+ if (func(gpu_addr, *cpu_addr, size)) {
+ break;
+ }
+ } else {
+ func(gpu_addr, *cpu_addr, size);
+ }
+ }
+}
+
+template <class P>
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
Image& image = slot_images[image_id];
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1193,8 +1488,34 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
- ForEachPage(image.cpu_addr, image.guest_size_bytes,
- [this, image_id](u64 page) { page_table[page].push_back(image_id); });
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory += Common::AlignUp(tentative_size, 1024);
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ auto map_id =
+ slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
+ ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
+ [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+ image.map_view_id = map_id;
+ return;
+ }
+ std::vector<ImageViewId> sparse_maps{};
+ ForEachSparseSegment(
+ image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+ auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
+ ForEachCPUPage(cpu_addr, size,
+ [this, map_id](u64 page) { page_table[page].push_back(map_id); });
+ sparse_maps.push_back(map_id);
+ });
+ sparse_views.emplace(image_id, std::move(sparse_maps));
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
}
template <class P>
@@ -1203,34 +1524,133 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
- ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
- const auto page_it = page_table.find(page);
- if (page_it == page_table.end()) {
- UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
- return;
- }
- std::vector<ImageId>& image_ids = page_it->second;
- const auto vector_it = std::ranges::find(image_ids, image_id);
- if (vector_it == image_ids.end()) {
- UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
- return;
- }
- image_ids.erase(vector_it);
+ image.flags &= ~ImageFlagBits::BadOverlap;
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory -= Common::AlignUp(tentative_size, 1024);
+ const auto& clear_page_table =
+ [this, image_id](
+ u64 page,
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
+ const auto page_it = selected_page_table.find(page);
+ if (page_it == selected_page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageId>& image_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_ids, image_id);
+ if (vector_it == image_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+ page << PAGE_BITS);
+ return;
+ }
+ image_ids.erase(vector_it);
+ };
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
+ [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ const auto map_id = image.map_view_id;
+ ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageMapId>& image_map_ids = page_it->second;
+ const auto vector_it = std::ranges::find(image_map_ids, map_id);
+ if (vector_it == image_map_ids.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
+ page << PAGE_BITS);
+ return;
+ }
+ image_map_ids.erase(vector_it);
+ });
+ slot_map_views.erase(map_id);
+ return;
+ }
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+ clear_page_table(page, sparse_page_table);
});
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map_range = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map_range.cpu_addr;
+ const std::size_t size = map_range.size;
+ ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
+ const auto page_it = page_table.find(page);
+ if (page_it == page_table.end()) {
+ UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
+ return;
+ }
+ std::vector<ImageMapId>& image_map_ids = page_it->second;
+ auto vector_it = image_map_ids.begin();
+ while (vector_it != image_map_ids.end()) {
+ ImageMapView& map = slot_map_views[*vector_it];
+ if (map.image_id != image_id) {
+ vector_it++;
+ continue;
+ }
+ if (!map.picked) {
+ map.picked = true;
+ }
+ vector_it = image_map_ids.erase(vector_it);
+ }
+ });
+ slot_map_views.erase(map_view_id);
+ }
+ sparse_views.erase(it);
}
template <class P>
-void TextureCache<P>::TrackImage(ImageBase& image) {
+void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked;
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ return;
+ }
+ if (True(image.flags & ImageFlagBits::Registered)) {
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map.cpu_addr;
+ const std::size_t size = map.size;
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ }
+ return;
+ }
+ ForEachSparseSegment(image,
+ [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ });
}
template <class P>
-void TextureCache<P>::UntrackImage(ImageBase& image) {
+void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
ASSERT(True(image.flags & ImageFlagBits::Tracked));
image.flags &= ~ImageFlagBits::Tracked;
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ if (False(image.flags & ImageFlagBits::Sparse)) {
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ return;
+ }
+ ASSERT(True(image.flags & ImageFlagBits::Registered));
+ auto it = sparse_views.find(image_id);
+ ASSERT(it != sparse_views.end());
+ auto& sparse_maps = it->second;
+ for (auto& map_view_id : sparse_maps) {
+ const auto& map = slot_map_views[map_view_id];
+ const VAddr cpu_addr = map.cpu_addr;
+ const std::size_t size = map.size;
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+ }
}
template <class P>
@@ -1276,9 +1696,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
return other_alias.id == image_id;
});
+ other_image.CheckAliasState();
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
num_removed_aliases);
}
+ for (const ImageId overlap_id : image.overlapping_images) {
+ ImageBase& other_image = slot_images[overlap_id];
+ [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
+ other_image.overlapping_images,
+ [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
+ other_image.CheckBadOverlapState();
+ ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
+ num_removed_overlaps);
+ }
for (const ImageViewId image_view_id : image_view_ids) {
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
slot_image_views.erase(image_view_id);
@@ -1362,10 +1792,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) {
- TrackImage(image);
+ TrackImage(image, image_id);
}
} else {
- RefreshContents(image);
+ RefreshContents(image, image_id);
SynchronizeAliases(image_id);
}
if (is_modification) {
@@ -1381,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
return;
}
const ImageViewBase& image_view = slot_image_views[image_view_id];
+ if (image_view.IsBuffer()) {
+ return;
+ }
PrepareImage(image_view.image_id, is_modification, invalidate);
}
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index c9571f7e4..47a11cb2f 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
constexpr SlotId CORRUPT_ID{0xfffffffe};
using ImageId = SlotId;
+using ImageMapId = SlotId;
using ImageViewId = SlotId;
using ImageAllocId = SlotId;
using SamplerId = SlotId;
@@ -132,8 +133,8 @@ struct BufferImageCopy {
};
struct BufferCopy {
- size_t src_offset;
- size_t dst_offset;
+ u64 src_offset;
+ u64 dst_offset;
size_t size;
};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 6835fd747..59cf2f561 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -169,23 +169,6 @@ template <u32 GOB_EXTENT>
return Common::DivCeil(AdjustMipSize(size, level), block_size);
}
-[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
- switch (num_samples) {
- case 1:
- return {1, 1};
- case 2:
- return {2, 1};
- case 4:
- return {2, 2};
- case 8:
- return {4, 2};
- case 16:
- return {4, 4};
- }
- UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
- return {1, 1};
-}
-
[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
}
@@ -283,14 +266,13 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
- u32 num_samples, u32 tile_width_spacing) {
- const auto [samples_x, samples_y] = Samples(num_samples);
+ u32 tile_width_spacing) {
const u32 bytes_per_block = BytesPerBlock(format);
return {
.size =
{
- .width = size.width * samples_x,
- .height = size.height * samples_y,
+ .width = size.width,
+ .height = size.height,
.depth = size.depth,
},
.block = block,
@@ -301,14 +283,12 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
- return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
- info.tile_width_spacing);
+ return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing);
}
[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
- u32 num_samples, u32 tile_width_spacing,
- u32 level) {
- const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
+ u32 tile_width_spacing, u32 level) {
+ const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing);
u32 offset = 0;
for (u32 current_level = 0; current_level < level; ++current_level) {
offset += CalculateLevelSize(info, current_level);
@@ -394,7 +374,7 @@ template <u32 GOB_EXTENT>
const s32 mip_offset = diff % layer_stride;
const std::array offsets = CalculateMipLevelOffsets(new_info);
const auto end = offsets.begin() + new_info.resources.levels;
- const auto it = std::find(offsets.begin(), end, mip_offset);
+ const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
if (it == end) {
// Mipmap is not aligned to any valid size
return std::nullopt;
@@ -581,6 +561,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset);
+ gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
+
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth);
@@ -643,8 +625,8 @@ u32 CalculateLayerStride(const ImageInfo& info) noexcept {
u32 CalculateLayerSize(const ImageInfo& info) noexcept {
ASSERT(info.type != ImageType::Linear);
- return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
- info.tile_width_spacing, info.resources.levels);
+ return CalculateLevelOffset(info.format, info.size, info.block, info.tile_width_spacing,
+ info.resources.levels);
}
LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
@@ -662,6 +644,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
return offsets;
}
+LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
+ const u32 num_levels = info.resources.levels;
+ const LevelInfo level_info = MakeLevelInfo(info);
+ LevelArray sizes{};
+ for (u32 level = 0; level < num_levels; ++level) {
+ sizes[level] = CalculateLevelSize(level_info, level);
+ }
+ return sizes;
+}
+
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
std::vector<u32> offsets;
@@ -774,14 +766,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
-bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
- if (config.Address() == 0) {
+bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
+ const GPUVAddr address = config.Address();
+ if (address == 0) {
return false;
}
- if (config.Address() > (u64(1) << 48)) {
+ if (address > (1ULL << 48)) {
return false;
}
- return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
+ if (gpu_memory.GpuToCpuAddress(address).has_value()) {
+ return true;
+ }
+ const ImageInfo info{config};
+ const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
+ return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
@@ -1177,37 +1175,37 @@ static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2
0x7f8000);
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
-static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 1, 0, 7) ==
+static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);
-static_assert(CalculateLevelOffset(PixelFormat::ASTC_2D_12X12_UNORM, {8192, 4096, 1}, {0, 2, 0}, 1,
- 0, 12) == 0x50d200);
-
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 0) == 0);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 1) == 0x400000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 2) == 0x500000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 3) == 0x540000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 4) == 0x550000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 5) == 0x554000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 6) == 0x555000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 7) == 0x555400);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 8) == 0x555600);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 9) == 0x555800);
+static_assert(CalculateLevelOffset(PixelFormat::ASTC_2D_12X12_UNORM, {8192, 4096, 1}, {0, 2, 0}, 0,
+ 12) == 0x50d200);
+
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 0) ==
+ 0);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 1) ==
+ 0x400000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 2) ==
+ 0x500000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 3) ==
+ 0x540000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 4) ==
+ 0x550000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 5) ==
+ 0x554000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 6) ==
+ 0x555000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 7) ==
+ 0x555400);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 8) ==
+ 0x555600);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 9) ==
+ 0x555800);
constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
u32 tile_width_spacing, u32 level) {
const Extent3D size{width, height, 1};
const Extent3D block{0, block_height, 0};
- const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
+ const u32 offset = CalculateLevelOffset(format, size, block, tile_width_spacing, level);
return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
}
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index cdc5cbc75..766502908 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,6 +40,8 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
+[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
+
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -55,7 +57,7 @@ struct OverlapResult {
const ImageInfo& src,
SubresourceBase base);
-[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
+[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
GPUVAddr gpu_addr, const ImageInfo& info,
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 9b2177ebd..3ab500760 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result,
static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
u32 nValues) {
// Determine encoding parameters
- IntegerEncodedValue val = EncodingsValues[maxRange];
+ IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange];
// Start decoding
u32 nValsDecoded = 0;
@@ -310,7 +310,7 @@ struct TexelWeightParams {
nIdxs *= 2;
}
- return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
+ return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs);
}
u32 GetNumWeightValues() const {
@@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
}
}
}
+
+static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
static constexpr u32 ReplicateByteTo16(std::size_t value) {
return REPLICATE_BYTE_TO_16_TABLE[value];
}
@@ -753,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
// figure out the max value for each of them...
u32 range = 256;
while (--range > 0) {
- IntegerEncodedValue val = EncodingsValues[range];
+ IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range];
u32 bitLength = val.GetBitLength(nValues);
if (bitLength <= nBitsForColorData) {
// Find the smallest possible range that matches the given encoding
while (--range > 0) {
- IntegerEncodedValue newval = EncodingsValues[range];
+ IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range];
if (!newval.MatchesEncoding(val)) {
break;
}
@@ -1363,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
// each partition.
// Determine partitions, partition index, and color endpoint modes
- s32 planeIdx = -1;
- u32 partitionIndex;
+ u32 planeIdx{UINT32_MAX};
+ u32 partitionIndex{};
u32 colorEndpointMode[4] = {0, 0, 0, 0};
// Define color data.
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index c1c37dfe7..0229ae122 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -77,7 +77,7 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
return encodings;
}
-constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues();
+constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down.
@@ -116,19 +116,10 @@ constexpr auto MakeReplicateTable() {
return table;
}
-constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
-struct AstcBufferData {
- decltype(EncodingsValues) encoding_values = EncodingsValues;
- decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
- decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
- decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
- decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
-} constexpr ASTC_BUFFER_DATA;
-
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
};
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
+[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
+ if (via_header_index) {
+ return {raw, raw};
+ } else {
+ const Tegra::Texture::TextureHandle handle{raw};
+ return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
+ }
+}
+
struct TICEntry {
union {
struct {
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
new file mode 100644
index 000000000..ba26ac3f1
--- /dev/null
+++ b/src/video_core/transform_feedback.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/transform_feedback.h"
+
+namespace VideoCommon {
+
+std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+ const TransformFeedbackState& state) {
+ static constexpr std::array VECTORS{
+ 28, // gl_Position
+ 32, // Generic 0
+ 36, // Generic 1
+ 40, // Generic 2
+ 44, // Generic 3
+ 48, // Generic 4
+ 52, // Generic 5
+ 56, // Generic 6
+ 60, // Generic 7
+ 64, // Generic 8
+ 68, // Generic 9
+ 72, // Generic 10
+ 76, // Generic 11
+ 80, // Generic 12
+ 84, // Generic 13
+ 88, // Generic 14
+ 92, // Generic 15
+ 96, // Generic 16
+ 100, // Generic 17
+ 104, // Generic 18
+ 108, // Generic 19
+ 112, // Generic 20
+ 116, // Generic 21
+ 120, // Generic 22
+ 124, // Generic 23
+ 128, // Generic 24
+ 132, // Generic 25
+ 136, // Generic 26
+ 140, // Generic 27
+ 144, // Generic 28
+ 148, // Generic 29
+ 152, // Generic 30
+ 156, // Generic 31
+ 160, // gl_FrontColor
+ 164, // gl_FrontSecondaryColor
+ 160, // gl_BackColor
+ 164, // gl_BackSecondaryColor
+ 192, // gl_TexCoord[0]
+ 196, // gl_TexCoord[1]
+ 200, // gl_TexCoord[2]
+ 204, // gl_TexCoord[3]
+ 208, // gl_TexCoord[4]
+ 212, // gl_TexCoord[5]
+ 216, // gl_TexCoord[6]
+ 220, // gl_TexCoord[7]
+ };
+ std::vector<Shader::TransformFeedbackVarying> xfb(256);
+ for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
+ const auto& locations = state.varyings[buffer];
+ const auto& layout = state.layouts[buffer];
+ const u32 varying_count = layout.varying_count;
+ u32 highest = 0;
+ for (u32 offset = 0; offset < varying_count; ++offset) {
+ const u32 base_offset = offset;
+ const u8 location = locations[offset];
+
+ UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream);
+ Shader::TransformFeedbackVarying varying{
+ .buffer = static_cast<u32>(buffer),
+ .stride = layout.stride,
+ .offset = offset * 4,
+ .components = 1,
+ };
+ if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) {
+ UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+ const u8 base_index = location / 4;
+ while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+ ++offset;
+ ++varying.components;
+ }
+ }
+ xfb[location] = varying;
+ highest = std::max(highest, (base_offset + varying.components) * 4);
+ }
+ UNIMPLEMENTED_IF(highest != layout.stride);
+ }
+ return xfb;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
new file mode 100644
index 000000000..8f6946d65
--- /dev/null
+++ b/src/video_core/transform_feedback.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/runtime_info.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace VideoCommon {
+
+struct TransformFeedbackState {
+ struct Layout {
+ u32 stream;
+ u32 varying_count;
+ u32 stride;
+ };
+ std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts;
+ std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ varyings;
+};
+
+std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+ const TransformFeedbackState& state);
+
+} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index f0ee76519..fdd1a5081 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -50,7 +50,7 @@ NsightAftermathTracker::NsightAftermathTracker() {
}
dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::LogDir) / "gpucrash";
- void(Common::FS::RemoveDirRecursively(dump_dir));
+ Common::FS::RemoveDirRecursively(dump_dir);
if (!Common::FS::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return;
@@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() {
}
}
-void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
+void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const {
if (!initialized) {
return;
}
-
- std::vector<u32> spirv_copy = spirv;
+ std::vector<u32> spirv_copy(spirv.begin(), spirv.end());
GFSDK_Aftermath_SpirvCode shader;
shader.pData = spirv_copy.data();
shader.size = static_cast<u32>(spirv_copy.size() * 4);
@@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
}
- if (file.Write(spirv) != spirv.size()) {
+ if (file.WriteSpan(spirv) != spirv.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
return;
}
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index 4fe2b14d9..eae1891dd 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -6,6 +6,7 @@
#include <filesystem>
#include <mutex>
+#include <span>
#include <string>
#include <vector>
@@ -33,7 +34,7 @@ public:
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
- void SaveShader(const std::vector<u32>& spirv) const;
+ void SaveShader(std::span<const u32> spirv) const;
private:
#ifdef HAS_NSIGHT_AFTERMATH
@@ -61,21 +62,21 @@ private:
bool initialized = false;
Common::DynamicLibrary dl;
- PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
- PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
- PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
- PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
- PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
- PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
- PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
- PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
+ PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{};
+ PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{};
+ PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{};
+ PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{};
#endif
};
#ifndef HAS_NSIGHT_AFTERMATH
inline NsightAftermathTracker::NsightAftermathTracker() = default;
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
-inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
+inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {}
#endif
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 64206b3d2..44afdc1cd 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <bitset>
#include <chrono>
#include <optional>
@@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
};
} // namespace Alternatives
+enum class NvidiaArchitecture {
+ AmpereOrNewer,
+ Turing,
+ VoltaOrOlder,
+};
+
constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
@@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
+ VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
+ VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+ VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
#ifdef _WIN32
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
#endif
@@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_R16G16_SFLOAT,
VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16_SNORM,
VK_FORMAT_R16_UINT,
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_R8G8_UNORM,
@@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
return format_properties;
}
+std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
+ const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
+ std::vector<std::string> supported_extensions;
+ supported_extensions.reserve(extensions.size());
+ for (const auto& extension : extensions) {
+ supported_extensions.emplace_back(extension.extensionName);
+ }
+ return supported_extensions;
+}
+
+NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
+ std::span<const std::string> exts) {
+ if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
+ VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
+ shading_rate_props.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
+ VkPhysicalDeviceProperties2KHR physical_properties{};
+ physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ physical_properties.pNext = &shading_rate_props;
+ physical.GetProperties2KHR(physical_properties);
+ if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
+ // Only Ampere and newer support this feature
+ return NvidiaArchitecture::AmpereOrNewer;
+ }
+ }
+ if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
+ return NvidiaArchitecture::Turing;
+ }
+ return NvidiaArchitecture::VoltaOrOlder;
+}
} // Anonymous namespace
Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
const vk::InstanceDispatch& dld_)
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
- format_properties{GetFormatProperties(physical)} {
+ supported_extensions{GetSupportedExtensions(physical)},
+ format_properties(GetFormatProperties(physical)) {
CheckSuitability(surface != nullptr);
SetupFamilies(surface);
SetupFeatures();
+ SetupProperties();
const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector extensions = LoadExtensions(surface != nullptr);
@@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
.independentBlend = true,
.geometryShader = true,
.tessellationShader = true,
- .sampleRateShading = false,
- .dualSrcBlend = false,
+ .sampleRateShading = true,
+ .dualSrcBlend = true,
.logicOp = false,
.multiDrawIndirect = false,
.drawIndirectFirstInstance = false,
.depthClamp = true,
.depthBiasClamp = true,
- .fillModeNonSolid = false,
- .depthBounds = false,
- .wideLines = false,
+ .fillModeNonSolid = true,
+ .depthBounds = is_depth_bounds_supported,
+ .wideLines = true,
.largePoints = true,
.alphaToOne = false,
.multiViewport = true,
@@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
.shaderSampledImageArrayDynamicIndexing = false,
.shaderStorageBufferArrayDynamicIndexing = false,
.shaderStorageImageArrayDynamicIndexing = false,
- .shaderClipDistance = false,
- .shaderCullDistance = false,
- .shaderFloat64 = false,
- .shaderInt64 = false,
- .shaderInt16 = false,
+ .shaderClipDistance = true,
+ .shaderCullDistance = true,
+ .shaderFloat64 = is_shader_float64_supported,
+ .shaderInt64 = is_shader_int64_supported,
+ .shaderInt16 = is_shader_int16_supported,
.shaderResourceResidency = false,
.shaderResourceMinLod = false,
.sparseBinding = false,
@@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
- .storageBuffer16BitAccess = false,
+ .storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = false,
.storageInputOutput16 = false,
@@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
};
SetNext(next, host_query_reset);
+ VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR,
+ .pNext = nullptr,
+ .variablePointersStorageBuffer = VK_TRUE,
+ .variablePointers = VK_TRUE,
+ };
+ SetNext(next, variable_pointers);
+
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
+ .pNext = nullptr,
+ .shaderDemoteToHelperInvocation = true,
+ };
+ SetNext(next, demote);
+
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
float16_int8 = {
@@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
}
+ if (!nv_viewport_array2) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
+ }
+
+ if (!nv_geometry_shader_passthrough) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
+ }
+
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
if (khr_uniform_buffer_standard_layout) {
std430_layout = {
@@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
}
+ VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
+ if (ext_line_rasterization) {
+ line_raster = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
+ .pNext = nullptr,
+ .rectangularLines = VK_TRUE,
+ .bresenhamLines = VK_FALSE,
+ .smoothLines = VK_TRUE,
+ .stippledRectangularLines = VK_FALSE,
+ .stippledBresenhamLines = VK_FALSE,
+ .stippledSmoothLines = VK_FALSE,
+ };
+ SetNext(next, line_raster);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
+ }
+
+ if (!ext_conservative_rasterization) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
+ }
+
+ VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
+ if (ext_provoking_vertex) {
+ provoking_vertex = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
+ .pNext = nullptr,
+ .provokingVertexLast = VK_TRUE,
+ .transformFeedbackPreservesProvokingVertex = VK_TRUE,
+ };
+ SetNext(next, provoking_vertex);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
+ }
+
+ VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
+ if (ext_vertex_input_dynamic_state) {
+ vertex_input_dynamic = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
+ .pNext = nullptr,
+ .vertexInputDynamicState = VK_TRUE,
+ };
+ SetNext(next, vertex_input_dynamic);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
+ }
+
+ VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64;
+ if (ext_shader_atomic_int64) {
+ atomic_int64 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR,
+ .pNext = nullptr,
+ .shaderBufferInt64Atomics = VK_TRUE,
+ .shaderSharedInt64Atomics = VK_TRUE,
+ };
+ SetNext(next, atomic_int64);
+ }
+
+ VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
+ if (khr_workgroup_memory_explicit_layout) {
+ workgroup_layout = {
+ .sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
+ .pNext = nullptr,
+ .workgroupMemoryExplicitLayout = VK_TRUE,
+ .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
+ .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
+ .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
+ };
+ SetNext(next, workgroup_layout);
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
- if (nv_device_diagnostics_config) {
+ if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) {
nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
diagnostics_nv = {
@@ -408,14 +545,37 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
+ CollectPhysicalMemoryInfo();
CollectTelemetryParameters();
CollectToolingInfo();
+ if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
+ const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
+ switch (arch) {
+ case NvidiaArchitecture::AmpereOrNewer:
+ LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
+ is_float16_supported = false;
+ break;
+ case NvidiaArchitecture::Turing:
+ break;
+ case NvidiaArchitecture::VoltaOrOlder:
+ LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
+ khr_push_descriptor = false;
+ break;
+ }
+ }
if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
- LOG_WARNING(
- Render_Vulkan,
- "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
- ext_extended_dynamic_state = false;
+ // Mask driver version variant
+ const u32 version = (properties.driverVersion << 3) >> 3;
+ if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
+ LOG_WARNING(Render_Vulkan,
+ "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
+ ext_extended_dynamic_state = false;
+ }
+ }
+ if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
+ ext_vertex_input_dynamic_state = false;
}
if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
@@ -425,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
-
- use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
}
Device::~Device() = default;
@@ -470,7 +628,7 @@ void Device::ReportLoss() const {
std::this_thread::sleep_for(std::chrono::seconds{15});
}
-void Device::SaveShader(const std::vector<u32>& spirv) const {
+void Device::SaveShader(std::span<const u32> spirv) const {
if (nsight_aftermath_tracker) {
nsight_aftermath_tracker->SaveShader(spirv);
}
@@ -531,6 +689,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
return (supported_usage & wanted_usage) == wanted_usage;
}
+std::string Device::GetDriverName() const {
+ switch (driver_id) {
+ case VK_DRIVER_ID_AMD_PROPRIETARY:
+ return "AMD";
+ case VK_DRIVER_ID_AMD_OPEN_SOURCE:
+ return "AMDVLK";
+ case VK_DRIVER_ID_MESA_RADV:
+ return "RADV";
+ case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
+ return "NVIDIA";
+ case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
+ return "INTEL";
+ case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
+ return "ANV";
+ case VK_DRIVER_ID_MESA_LLVMPIPE:
+ return "LAVAPIPE";
+ default:
+ return vendor_name;
+ }
+}
+
void Device::CheckSuitability(bool requires_swapchain) const {
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
bool has_swapchain = false;
@@ -575,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const {
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
}
}
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
+ demote.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
+ demote.pNext = nullptr;
+
+ VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{};
+ variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR;
+ variable_pointers.pNext = &demote;
+
VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
+ robustness2.pNext = &variable_pointers;
- VkPhysicalDeviceFeatures2 features2{};
+ VkPhysicalDeviceFeatures2KHR features2{};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &robustness2;
@@ -588,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const {
const std::array feature_report{
std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
- std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.imageCubeArray, "imageCubeArray"),
std::make_pair(features.independentBlend, "independentBlend"),
std::make_pair(features.depthClamp, "depthClamp"),
@@ -596,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const {
std::make_pair(features.largePoints, "largePoints"),
std::make_pair(features.multiViewport, "multiViewport"),
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
+ std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
+ std::make_pair(features.wideLines, "wideLines"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
+ std::make_pair(features.sampleRateShading, "sampleRateShading"),
+ std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
+ std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
+ std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
+ std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
+ std::make_pair(variable_pointers.variablePointers, "variablePointers"),
+ std::make_pair(variable_pointers.variablePointersStorageBuffer,
+ "variablePointersStorageBuffer"),
std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
@@ -625,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
bool has_khr_shader_float16_int8{};
+ bool has_khr_workgroup_memory_explicit_layout{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
bool has_ext_extended_dynamic_state{};
- for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
+ bool has_ext_shader_atomic_int64{};
+ bool has_ext_provoking_vertex{};
+ bool has_ext_vertex_input_dynamic_state{};
+ bool has_ext_line_rasterization{};
+ for (const std::string& extension : supported_extensions) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
- if (extension.extensionName != std::string_view(name)) {
+ if (extension != name) {
return;
}
if (push) {
@@ -643,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
};
test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
+ test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
+ test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
+ true);
test(khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+ test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
+ test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -653,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
true);
test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
+ test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
+ true);
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
- if (Settings::values.renderer_debug) {
+ test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
+ test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
+ false);
+ test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
+ test(has_khr_workgroup_memory_explicit_layout,
+ VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+ test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
+ if (Settings::values.enable_nsight_aftermath) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
}
}
- VkPhysicalDeviceFeatures2KHR features;
+ VkPhysicalDeviceFeatures2KHR features{};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
VkPhysicalDeviceProperties2KHR physical_properties;
@@ -700,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
+ ext_subgroup_size_control = true;
}
} else {
is_warp_potentially_bigger = true;
}
+ if (has_ext_provoking_vertex) {
+ VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
+ provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
+ provoking_vertex.pNext = nullptr;
+ features.pNext = &provoking_vertex;
+ physical.GetFeatures2KHR(features);
+
+ if (provoking_vertex.provokingVertexLast &&
+ provoking_vertex.transformFeedbackPreservesProvokingVertex) {
+ extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
+ ext_provoking_vertex = true;
+ }
+ }
+ if (has_ext_vertex_input_dynamic_state) {
+ VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
+ vertex_input.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
+ vertex_input.pNext = nullptr;
+ features.pNext = &vertex_input;
+ physical.GetFeatures2KHR(features);
+
+ if (vertex_input.vertexInputDynamicState) {
+ extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+ ext_vertex_input_dynamic_state = true;
+ }
+ }
+ if (has_ext_shader_atomic_int64) {
+ VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
+ atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ atomic_int64.pNext = nullptr;
+ features.pNext = &atomic_int64;
+ physical.GetFeatures2KHR(features);
+
+ if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
+ extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
+ ext_shader_atomic_int64 = true;
+ }
+ }
if (has_ext_transform_feedback) {
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -738,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
}
if (has_ext_extended_dynamic_state) {
- VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
- dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
- dynamic_state.pNext = nullptr;
- features.pNext = &dynamic_state;
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
+ extended_dynamic_state.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ extended_dynamic_state.pNext = nullptr;
+ features.pNext = &extended_dynamic_state;
physical.GetFeatures2KHR(features);
- if (dynamic_state.extendedDynamicState) {
+ if (extended_dynamic_state.extendedDynamicState) {
extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
ext_extended_dynamic_state = true;
}
}
+ if (has_ext_line_rasterization) {
+ VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
+ line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT;
+ line_raster.pNext = nullptr;
+ features.pNext = &line_raster;
+ physical.GetFeatures2KHR(features);
+ if (line_raster.rectangularLines && line_raster.smoothLines) {
+ extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
+ ext_line_rasterization = true;
+ }
+ }
+ if (has_khr_workgroup_memory_explicit_layout) {
+ VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
+ layout.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
+ layout.pNext = nullptr;
+ features.pNext = &layout;
+ physical.GetFeatures2KHR(features);
+
+ if (layout.workgroupMemoryExplicitLayout &&
+ layout.workgroupMemoryExplicitLayout8BitAccess &&
+ layout.workgroupMemoryExplicitLayout16BitAccess &&
+ layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
+ extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
+ khr_workgroup_memory_explicit_layout = true;
+ }
+ }
+ if (khr_push_descriptor) {
+ VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
+ push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
+ push_descriptor.pNext = nullptr;
+
+ physical_properties.pNext = &push_descriptor;
+ physical.GetProperties2KHR(physical_properties);
+
+ max_push_descriptors = push_descriptor.maxPushDescriptors;
+ }
return extensions;
}
@@ -784,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
}
void Device::SetupFeatures() {
- const auto supported_features{physical.GetFeatures()};
- is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
- is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
+ const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
+ is_depth_bounds_supported = features.depthBounds;
+ is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
+ is_shader_float64_supported = features.shaderFloat64;
+ is_shader_int64_supported = features.shaderInt64;
+ is_shader_int16_supported = features.shaderInt16;
+ is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
is_blit_depth_stencil_supported = TestDepthStencilBlits();
- is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
+ is_optimal_astc_supported = IsOptimalAstcSupported(features);
+}
+
+void Device::SetupProperties() {
+ float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
+
+ VkPhysicalDeviceProperties2KHR properties2{};
+ properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ properties2.pNext = &float_controls;
+
+ physical.GetProperties2KHR(properties2);
}
void Device::CollectTelemetryParameters() {
@@ -810,11 +1118,16 @@ void Device::CollectTelemetryParameters() {
driver_id = driver.driverID;
vendor_name = driver.driverName;
+}
- const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
- reported_extensions.reserve(std::size(extensions));
- for (const auto& extension : extensions) {
- reported_extensions.emplace_back(extension.extensionName);
+void Device::CollectPhysicalMemoryInfo() {
+ const auto mem_properties = physical.GetMemoryProperties();
+ const size_t num_properties = mem_properties.memoryHeapCount;
+ device_access_memory = 0;
+ for (size_t element = 0; element < num_properties; ++element) {
+ if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
+ device_access_memory += mem_properties.memoryHeaps[element].size;
+ }
}
}
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 67d70cd22..df394e384 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -4,6 +4,7 @@
#pragma once
+#include <span>
#include <string>
#include <string_view>
#include <unordered_map>
@@ -43,7 +44,10 @@ public:
void ReportLoss() const;
/// Reports a shader to Nsight Aftermath.
- void SaveShader(const std::vector<u32>& spirv) const;
+ void SaveShader(std::span<const u32> spirv) const;
+
+ /// Returns the name of the VkDriverId reported from Vulkan.
+ std::string GetDriverName() const;
/// Returns the dispatch loader with direct function pointers of the device.
const vk::DeviceDispatch& GetDispatchLoader() const {
@@ -125,6 +129,11 @@ public:
return properties.limits.maxComputeSharedMemorySize;
}
+ /// Returns float control properties of the device.
+ const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
+ return float_controls;
+ }
+
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -145,11 +154,31 @@ public:
return guest_warp_stages & stage;
}
+ /// Returns the maximum number of push descriptors.
+ u32 MaxPushDescriptors() const {
+ return max_push_descriptors;
+ }
+
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
}
+ /// Returns true if shader int64 is supported.
+ bool IsShaderInt64Supported() const {
+ return is_shader_int64_supported;
+ }
+
+ /// Returns true if shader int16 is supported.
+ bool IsShaderInt16Supported() const {
+ return is_shader_int16_supported;
+ }
+
+ // Returns true if depth bounds is supported.
+ bool IsDepthBoundsSupported() const {
+ return is_depth_bounds_supported;
+ }
+
/// Returns true when blitting from and to depth stencil images is supported.
bool IsBlitDepthStencilSupported() const {
return is_blit_depth_stencil_supported;
@@ -160,11 +189,36 @@ public:
return nv_viewport_swizzle;
}
- /// Returns true if the device supports VK_EXT_scalar_block_layout.
+ /// Returns true if the device supports VK_NV_viewport_array2.
+ bool IsNvViewportArray2Supported() const {
+ return nv_viewport_array2;
+ }
+
+ /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
+ bool IsNvGeometryShaderPassthroughSupported() const {
+ return nv_geometry_shader_passthrough;
+ }
+
+ /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
}
+ /// Returns true if the device supports VK_KHR_spirv_1_4.
+ bool IsKhrSpirv1_4Supported() const {
+ return khr_spirv_1_4;
+ }
+
+ /// Returns true if the device supports VK_KHR_push_descriptor.
+ bool IsKhrPushDescriptorSupported() const {
+ return khr_push_descriptor;
+ }
+
+ /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
+ bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
+ return khr_workgroup_memory_explicit_layout;
+ }
+
/// Returns true if the device supports VK_EXT_index_type_uint8.
bool IsExtIndexTypeUint8Supported() const {
return ext_index_type_uint8;
@@ -185,6 +239,11 @@ public:
return ext_shader_viewport_index_layer;
}
+ /// Returns true if the device supports VK_EXT_subgroup_size_control.
+ bool IsExtSubgroupSizeControlSupported() const {
+ return ext_subgroup_size_control;
+ }
+
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
@@ -200,11 +259,36 @@ public:
return ext_extended_dynamic_state;
}
+ /// Returns true if the device supports VK_EXT_line_rasterization.
+ bool IsExtLineRasterizationSupported() const {
+ return ext_line_rasterization;
+ }
+
+ /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
+ bool IsExtVertexInputDynamicStateSupported() const {
+ return ext_vertex_input_dynamic_state;
+ }
+
/// Returns true if the device supports VK_EXT_shader_stencil_export.
bool IsExtShaderStencilExportSupported() const {
return ext_shader_stencil_export;
}
+ /// Returns true if the device supports VK_EXT_conservative_rasterization.
+ bool IsExtConservativeRasterizationSupported() const {
+ return ext_conservative_rasterization;
+ }
+
+ /// Returns true if the device supports VK_EXT_provoking_vertex.
+ bool IsExtProvokingVertexSupported() const {
+ return ext_provoking_vertex;
+ }
+
+ /// Returns true if the device supports VK_KHR_shader_atomic_int64.
+ bool IsExtShaderAtomicInt64Supported() const {
+ return ext_shader_atomic_int64;
+ }
+
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
@@ -217,12 +301,11 @@ public:
/// Returns the list of available extensions.
const std::vector<std::string>& GetAvailableExtensions() const {
- return reported_extensions;
+ return supported_extensions;
}
- /// Returns true if the setting for async shader compilation is enabled.
- bool UseAsynchronousShaders() const {
- return use_asynchronous_shaders;
+ u64 GetDeviceLocalMemory() const {
+ return device_access_memory;
}
private:
@@ -238,12 +321,18 @@ private:
/// Sets up device features.
void SetupFeatures();
+ /// Sets up device properties.
+ void SetupProperties();
+
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
/// Collects information about attached tools.
void CollectToolingInfo();
+ /// Collects information about the device's local memory.
+ void CollectPhysicalMemoryInfo();
+
/// Returns a list of queue initialization descriptors.
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
@@ -257,45 +346,60 @@ private:
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
- VkInstance instance; ///< Vulkan instance.
- vk::DeviceDispatch dld; ///< Device function pointers.
- vk::PhysicalDevice physical; ///< Physical device.
- VkPhysicalDeviceProperties properties; ///< Device properties.
- vk::Device logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 instance_version{}; ///< Vulkan onstance version.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- VkDriverIdKHR driver_id{}; ///< Driver ID.
- VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool is_float16_supported{}; ///< Support for float16 arithmetics.
- bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
+ VkInstance instance; ///< Vulkan instance.
+ vk::DeviceDispatch dld; ///< Device function pointers.
+ vk::PhysicalDevice physical; ///< Physical device.
+ VkPhysicalDeviceProperties properties; ///< Device properties.
+ VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties.
+ vk::Device logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 instance_version{}; ///< Vulkan onstance version.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ VkDriverIdKHR driver_id{}; ///< Driver ID.
+ VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
+ u64 device_access_memory{}; ///< Total size of device local memory in bytes.
+ u32 max_push_descriptors{}; ///< Maximum number of push descriptors
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
+ bool is_depth_bounds_supported{}; ///< Support for depth bounds.
+ bool is_shader_float64_supported{}; ///< Support for float64.
+ bool is_shader_int64_supported{}; ///< Support for int64.
+ bool is_shader_int16_supported{}; ///< Support for int16.
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
- bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
- bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
- bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
- bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
- bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
- bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
- bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
- bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
- bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
- bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
- bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
- bool has_renderdoc{}; ///< Has RenderDoc attached
- bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
-
- // Asynchronous Graphics Pipeline setting
- bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+ bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
+ bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
+ bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
+ bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
+ bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
+ bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
+ bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
+ bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
+ bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
+ bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+ bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
+ bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
+ bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
+ bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
+ bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
+ bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization.
+ bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state.
+ bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
+ bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
+ bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
+ bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
+ bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
+ bool has_renderdoc{}; ///< Has RenderDoc attached
+ bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
// Telemetry parameters
- std::string vendor_name; ///< Device's driver name.
- std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
+ std::string vendor_name; ///< Device's driver name.
+ std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 5edd06ebc..aa173d19e 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
class MemoryAllocation {
public:
- explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
- u64 allocation_size_, u32 type)
- : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
- shifted_memory_type{1U << type} {}
+ explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
+ VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
+ : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
+ property_flags{properties}, shifted_memory_type{1U << type} {}
#if defined(_WIN32) || defined(__unix__)
~MemoryAllocation() {
@@ -106,6 +106,10 @@ public:
const auto it = std::ranges::find(commits, begin, &Range::begin);
ASSERT_MSG(it != commits.end(), "Invalid commit");
commits.erase(it);
+ if (commits.empty()) {
+ // Do not call any code involving 'this' after this call, the object will be destroyed
+ allocator->ReleaseMemory(this);
+ }
}
[[nodiscard]] std::span<u8> Map() {
@@ -171,6 +175,7 @@ private:
return candidate;
}
+ MemoryAllocator* const allocator; ///< Parent memory allocation.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const u64 allocation_size; ///< Size of this allocation.
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
@@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask,
return false;
}
}
- allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
+ allocations.push_back(
+ std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type));
return true;
}
+void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
+ const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
+ ASSERT(it != allocations.end());
+ allocations.erase(it);
+}
+
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags) {
for (auto& allocation : allocations) {
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index db12d02f4..b61e931e0 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -69,6 +69,8 @@ private:
/// Memory allocator container.
/// Allocates and releases memory allocations on demand.
class MemoryAllocator {
+ friend MemoryAllocation;
+
public:
/**
* Construct memory allocator
@@ -104,6 +106,9 @@ private:
/// Tries to allocate a chunk of memory.
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
+ /// Releases a chunk of memory.
+ void ReleaseMemory(MemoryAllocation* alloc);
+
/// Tries to allocate a memory commit.
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2aa0ffbe6..70898004a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdFillBuffer);
X(vkCmdPipelineBarrier);
X(vkCmdPushConstants);
+ X(vkCmdPushDescriptorSetWithTemplateKHR);
X(vkCmdSetBlendConstants);
X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds);
@@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetDepthTestEnableEXT);
X(vkCmdSetDepthWriteEnableEXT);
X(vkCmdSetFrontFaceEXT);
+ X(vkCmdSetLineWidth);
X(vkCmdSetPrimitiveTopologyEXT);
X(vkCmdSetStencilOpEXT);
X(vkCmdSetStencilTestEnableEXT);
+ X(vkCmdSetVertexInputEXT);
X(vkCmdResolveImage);
X(vkCreateBuffer);
X(vkCreateBufferView);
@@ -199,7 +202,7 @@ void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjec
const VkDebugUtilsObjectNameInfoEXT name_info{
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.pNext = nullptr,
- .objectType = VK_OBJECT_TYPE_IMAGE,
+ .objectType = type,
.objectHandle = reinterpret_cast<u64>(handle),
.pObjectName = name,
};
@@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept {
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
case VkResult::VK_ERROR_UNKNOWN:
return "VK_ERROR_UNKNOWN";
- case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
- return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
case VkResult::VK_THREAD_IDLE_KHR:
return "VK_THREAD_IDLE_KHR";
case VkResult::VK_THREAD_DONE_KHR:
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 3e36d356a..d76bb4324 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
PFN_vkBindBufferMemory vkBindBufferMemory{};
PFN_vkBindImageMemory vkBindImageMemory{};
+ PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
PFN_vkCmdBeginQuery vkCmdBeginQuery{};
PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
- PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
PFN_vkCmdBindPipeline vkCmdBindPipeline{};
PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
+ PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
PFN_vkCmdBlitImage vkCmdBlitImage{};
PFN_vkCmdClearAttachments vkCmdClearAttachments{};
PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
@@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdDispatch vkCmdDispatch{};
PFN_vkCmdDraw vkCmdDraw{};
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
+ PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
- PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdFillBuffer vkCmdFillBuffer{};
PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
PFN_vkCmdPushConstants vkCmdPushConstants{};
+ PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
+ PFN_vkCmdResolveImage vkCmdResolveImage{};
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
+ PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
- PFN_vkCmdSetEvent vkCmdSetEvent{};
- PFN_vkCmdSetScissor vkCmdSetScissor{};
- PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
- PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
- PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
- PFN_vkCmdSetViewport vkCmdSetViewport{};
- PFN_vkCmdWaitEvents vkCmdWaitEvents{};
- PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
- PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
+ PFN_vkCmdSetEvent vkCmdSetEvent{};
PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
+ PFN_vkCmdSetLineWidth vkCmdSetLineWidth{};
PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
+ PFN_vkCmdSetScissor vkCmdSetScissor{};
+ PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
+ PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
- PFN_vkCmdResolveImage vkCmdResolveImage{};
+ PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
+ PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
+ PFN_vkCmdSetViewport vkCmdSetViewport{};
+ PFN_vkCmdWaitEvents vkCmdWaitEvents{};
PFN_vkCreateBuffer vkCreateBuffer{};
PFN_vkCreateBufferView vkCreateBufferView{};
PFN_vkCreateCommandPool vkCreateCommandPool{};
@@ -989,6 +992,12 @@ public:
dynamic_offsets.size(), dynamic_offsets.data());
}
+ void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
+ VkPipelineLayout layout, u32 set,
+ const void* data) const noexcept {
+ dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
+ }
+
void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
dld->vkCmdBindPipeline(handle, bind_point, pipeline);
}
@@ -1190,6 +1199,10 @@ public:
dld->vkCmdSetFrontFaceEXT(handle, front_face);
}
+ void SetLineWidth(float line_width) const noexcept {
+ dld->vkCmdSetLineWidth(handle, line_width);
+ }
+
void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
}
@@ -1203,6 +1216,13 @@ public:
dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
+ void SetVertexInputEXT(
+ vk::Span<VkVertexInputBindingDescription2EXT> bindings,
+ vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept {
+ dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(),
+ attributes.data());
+ }
+
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
const VkDeviceSize* offsets,
const VkDeviceSize* sizes) const noexcept {
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp
index 67183e64c..b1e02c57a 100644
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -8,7 +8,17 @@
#include <string>
#include <fmt/format.h>
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+#endif
#include <httplib.h>
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
#include "common/logging/log.h"
#include "web_service/web_backend.h"
@@ -100,8 +110,9 @@ struct Client::Impl {
request.body = data;
httplib::Response response;
+ httplib::Error error;
- if (!cli->send(request, response)) {
+ if (!cli->send(request, response, error)) {
LOG_ERROR(WebService, "{} to {} returned null", method, host + path);
return WebResult{WebResult::Code::LibError, "Null response", ""};
}
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 634fe66a5..cb4bdcc7e 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -15,18 +15,19 @@ add_executable(yuzu
about_dialog.cpp
about_dialog.h
aboutdialog.ui
- applets/controller.cpp
- applets/controller.h
- applets/controller.ui
- applets/error.cpp
- applets/error.h
- applets/profile_select.cpp
- applets/profile_select.h
- applets/software_keyboard.cpp
- applets/software_keyboard.h
- applets/software_keyboard.ui
- applets/web_browser.cpp
- applets/web_browser.h
+ applets/qt_controller.cpp
+ applets/qt_controller.h
+ applets/qt_controller.ui
+ applets/qt_error.cpp
+ applets/qt_error.h
+ applets/qt_profile_select.cpp
+ applets/qt_profile_select.h
+ applets/qt_software_keyboard.cpp
+ applets/qt_software_keyboard.h
+ applets/qt_software_keyboard.ui
+ applets/qt_web_browser.cpp
+ applets/qt_web_browser.h
+ applets/qt_web_browser_scripts.h
bootmanager.cpp
bootmanager.h
compatdb.ui
@@ -52,6 +53,9 @@ add_executable(yuzu
configuration/configure_debug_controller.cpp
configuration/configure_debug_controller.h
configuration/configure_debug_controller.ui
+ configuration/configure_debug_tab.cpp
+ configuration/configure_debug_tab.h
+ configuration/configure_debug_tab.ui
configuration/configure_dialog.cpp
configuration/configure_dialog.h
configuration/configure_filesystem.cpp
diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp
index a2e0e6962..6b0155a78 100644
--- a/src/yuzu/about_dialog.cpp
+++ b/src/yuzu/about_dialog.cpp
@@ -14,7 +14,8 @@ AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDia
const auto build_id = std::string(Common::g_build_id);
const auto yuzu_build = fmt::format("yuzu Development Build | {}-{}", branch_name, description);
- const auto override_build = fmt::format(std::string(Common::g_title_bar_format_idle), build_id);
+ const auto override_build =
+ fmt::format(fmt::runtime(std::string(Common::g_title_bar_format_idle)), build_id);
const auto yuzu_build_version = override_build.empty() ? yuzu_build : override_build;
ui->setupUi(this);
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp
deleted file mode 100644
index 836d90fda..000000000
--- a/src/yuzu/applets/controller.cpp
+++ /dev/null
@@ -1,695 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <thread>
-
-#include "common/assert.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/hle/lock.h"
-#include "core/hle/service/hid/controllers/npad.h"
-#include "core/hle/service/hid/hid.h"
-#include "core/hle/service/sm/sm.h"
-#include "ui_controller.h"
-#include "yuzu/applets/controller.h"
-#include "yuzu/configuration/configure_input.h"
-#include "yuzu/configuration/configure_input_profile_dialog.h"
-#include "yuzu/configuration/configure_motion_touch.h"
-#include "yuzu/configuration/configure_vibration.h"
-#include "yuzu/configuration/input_profiles.h"
-#include "yuzu/main.h"
-
-namespace {
-
-constexpr std::size_t HANDHELD_INDEX = 8;
-
-constexpr std::array<std::array<bool, 4>, 8> led_patterns{{
- {true, false, false, false},
- {true, true, false, false},
- {true, true, true, false},
- {true, true, true, true},
- {true, false, false, true},
- {true, false, true, false},
- {true, false, true, true},
- {false, true, true, false},
-}};
-
-void UpdateController(Settings::ControllerType controller_type, std::size_t npad_index,
- bool connected) {
- Core::System& system{Core::System::GetInstance()};
-
- if (!system.IsPoweredOn()) {
- return;
- }
-
- Service::SM::ServiceManager& sm = system.ServiceManager();
-
- auto& npad =
- sm.GetService<Service::HID::Hid>("hid")
- ->GetAppletResource()
- ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad);
-
- npad.UpdateControllerAt(npad.MapSettingsTypeToNPad(controller_type), npad_index, connected);
-}
-
-// Returns true if the given controller type is compatible with the given parameters.
-bool IsControllerCompatible(Settings::ControllerType controller_type,
- Core::Frontend::ControllerParameters parameters) {
- switch (controller_type) {
- case Settings::ControllerType::ProController:
- return parameters.allow_pro_controller;
- case Settings::ControllerType::DualJoyconDetached:
- return parameters.allow_dual_joycons;
- case Settings::ControllerType::LeftJoycon:
- return parameters.allow_left_joycon;
- case Settings::ControllerType::RightJoycon:
- return parameters.allow_right_joycon;
- case Settings::ControllerType::Handheld:
- return parameters.enable_single_mode && parameters.allow_handheld;
- case Settings::ControllerType::GameCube:
- return parameters.allow_gamecube_controller;
- default:
- return false;
- }
-}
-
-} // namespace
-
-QtControllerSelectorDialog::QtControllerSelectorDialog(
- QWidget* parent, Core::Frontend::ControllerParameters parameters_,
- InputCommon::InputSubsystem* input_subsystem_)
- : QDialog(parent), ui(std::make_unique<Ui::QtControllerSelectorDialog>()),
- parameters(std::move(parameters_)), input_subsystem{input_subsystem_},
- input_profiles(std::make_unique<InputProfiles>()) {
- ui->setupUi(this);
-
- player_widgets = {
- ui->widgetPlayer1, ui->widgetPlayer2, ui->widgetPlayer3, ui->widgetPlayer4,
- ui->widgetPlayer5, ui->widgetPlayer6, ui->widgetPlayer7, ui->widgetPlayer8,
- };
-
- player_groupboxes = {
- ui->groupPlayer1Connected, ui->groupPlayer2Connected, ui->groupPlayer3Connected,
- ui->groupPlayer4Connected, ui->groupPlayer5Connected, ui->groupPlayer6Connected,
- ui->groupPlayer7Connected, ui->groupPlayer8Connected,
- };
-
- connected_controller_icons = {
- ui->controllerPlayer1, ui->controllerPlayer2, ui->controllerPlayer3, ui->controllerPlayer4,
- ui->controllerPlayer5, ui->controllerPlayer6, ui->controllerPlayer7, ui->controllerPlayer8,
- };
-
- led_patterns_boxes = {{
- {ui->checkboxPlayer1LED1, ui->checkboxPlayer1LED2, ui->checkboxPlayer1LED3,
- ui->checkboxPlayer1LED4},
- {ui->checkboxPlayer2LED1, ui->checkboxPlayer2LED2, ui->checkboxPlayer2LED3,
- ui->checkboxPlayer2LED4},
- {ui->checkboxPlayer3LED1, ui->checkboxPlayer3LED2, ui->checkboxPlayer3LED3,
- ui->checkboxPlayer3LED4},
- {ui->checkboxPlayer4LED1, ui->checkboxPlayer4LED2, ui->checkboxPlayer4LED3,
- ui->checkboxPlayer4LED4},
- {ui->checkboxPlayer5LED1, ui->checkboxPlayer5LED2, ui->checkboxPlayer5LED3,
- ui->checkboxPlayer5LED4},
- {ui->checkboxPlayer6LED1, ui->checkboxPlayer6LED2, ui->checkboxPlayer6LED3,
- ui->checkboxPlayer6LED4},
- {ui->checkboxPlayer7LED1, ui->checkboxPlayer7LED2, ui->checkboxPlayer7LED3,
- ui->checkboxPlayer7LED4},
- {ui->checkboxPlayer8LED1, ui->checkboxPlayer8LED2, ui->checkboxPlayer8LED3,
- ui->checkboxPlayer8LED4},
- }};
-
- explain_text_labels = {
- ui->labelPlayer1Explain, ui->labelPlayer2Explain, ui->labelPlayer3Explain,
- ui->labelPlayer4Explain, ui->labelPlayer5Explain, ui->labelPlayer6Explain,
- ui->labelPlayer7Explain, ui->labelPlayer8Explain,
- };
-
- emulated_controllers = {
- ui->comboPlayer1Emulated, ui->comboPlayer2Emulated, ui->comboPlayer3Emulated,
- ui->comboPlayer4Emulated, ui->comboPlayer5Emulated, ui->comboPlayer6Emulated,
- ui->comboPlayer7Emulated, ui->comboPlayer8Emulated,
- };
-
- player_labels = {
- ui->labelPlayer1, ui->labelPlayer2, ui->labelPlayer3, ui->labelPlayer4,
- ui->labelPlayer5, ui->labelPlayer6, ui->labelPlayer7, ui->labelPlayer8,
- };
-
- connected_controller_labels = {
- ui->labelConnectedPlayer1, ui->labelConnectedPlayer2, ui->labelConnectedPlayer3,
- ui->labelConnectedPlayer4, ui->labelConnectedPlayer5, ui->labelConnectedPlayer6,
- ui->labelConnectedPlayer7, ui->labelConnectedPlayer8,
- };
-
- connected_controller_checkboxes = {
- ui->checkboxPlayer1Connected, ui->checkboxPlayer2Connected, ui->checkboxPlayer3Connected,
- ui->checkboxPlayer4Connected, ui->checkboxPlayer5Connected, ui->checkboxPlayer6Connected,
- ui->checkboxPlayer7Connected, ui->checkboxPlayer8Connected,
- };
-
- // Setup/load everything prior to setting up connections.
- // This avoids unintentionally changing the states of elements while loading them in.
- SetSupportedControllers();
- DisableUnsupportedPlayers();
-
- for (std::size_t player_index = 0; player_index < NUM_PLAYERS; ++player_index) {
- SetEmulatedControllers(player_index);
- }
-
- LoadConfiguration();
-
- for (std::size_t i = 0; i < NUM_PLAYERS; ++i) {
- SetExplainText(i);
- UpdateControllerIcon(i);
- UpdateLEDPattern(i);
- UpdateBorderColor(i);
-
- connect(player_groupboxes[i], &QGroupBox::toggled, [this, i](bool checked) {
- if (checked) {
- for (std::size_t index = 0; index <= i; ++index) {
- connected_controller_checkboxes[index]->setChecked(checked);
- }
- } else {
- for (std::size_t index = i; index < NUM_PLAYERS; ++index) {
- connected_controller_checkboxes[index]->setChecked(checked);
- }
- }
- });
-
- connect(emulated_controllers[i], qOverload<int>(&QComboBox::currentIndexChanged),
- [this, i](int) {
- UpdateControllerIcon(i);
- UpdateControllerState(i);
- UpdateLEDPattern(i);
- CheckIfParametersMet();
- });
-
- connect(connected_controller_checkboxes[i], &QCheckBox::stateChanged, [this, i](int state) {
- player_groupboxes[i]->setChecked(state == Qt::Checked);
- UpdateControllerIcon(i);
- UpdateControllerState(i);
- UpdateLEDPattern(i);
- UpdateBorderColor(i);
- CheckIfParametersMet();
- });
-
- if (i == 0) {
- connect(emulated_controllers[i], qOverload<int>(&QComboBox::currentIndexChanged),
- [this, i](int index) {
- UpdateDockedState(GetControllerTypeFromIndex(index, i) ==
- Settings::ControllerType::Handheld);
- });
- }
- }
-
- connect(ui->vibrationButton, &QPushButton::clicked, this,
- &QtControllerSelectorDialog::CallConfigureVibrationDialog);
-
- connect(ui->motionButton, &QPushButton::clicked, this,
- &QtControllerSelectorDialog::CallConfigureMotionTouchDialog);
-
- connect(ui->inputConfigButton, &QPushButton::clicked, this,
- &QtControllerSelectorDialog::CallConfigureInputProfileDialog);
-
- connect(ui->buttonBox, &QDialogButtonBox::accepted, this,
- &QtControllerSelectorDialog::ApplyConfiguration);
-
- // Enhancement: Check if the parameters have already been met before disconnecting controllers.
- // If all the parameters are met AND only allows a single player,
- // stop the constructor here as we do not need to continue.
- if (CheckIfParametersMet() && parameters.enable_single_mode) {
- return;
- }
-
- // If keep_controllers_connected is false, forcefully disconnect all controllers
- if (!parameters.keep_controllers_connected) {
- for (auto player : player_groupboxes) {
- player->setChecked(false);
- }
- }
-
- resize(0, 0);
-}
-
-QtControllerSelectorDialog::~QtControllerSelectorDialog() = default;
-
-int QtControllerSelectorDialog::exec() {
- if (parameters_met && parameters.enable_single_mode) {
- return QDialog::Accepted;
- }
- return QDialog::exec();
-}
-
-void QtControllerSelectorDialog::ApplyConfiguration() {
- const bool pre_docked_mode = Settings::values.use_docked_mode.GetValue();
- Settings::values.use_docked_mode.SetValue(ui->radioDocked->isChecked());
- OnDockedModeChanged(pre_docked_mode, Settings::values.use_docked_mode.GetValue());
-
- Settings::values.vibration_enabled.SetValue(ui->vibrationGroup->isChecked());
- Settings::values.motion_enabled.SetValue(ui->motionGroup->isChecked());
-}
-
-void QtControllerSelectorDialog::LoadConfiguration() {
- for (std::size_t index = 0; index < NUM_PLAYERS; ++index) {
- const auto connected =
- Settings::values.players.GetValue()[index].connected ||
- (index == 0 && Settings::values.players.GetValue()[HANDHELD_INDEX].connected);
- player_groupboxes[index]->setChecked(connected);
- connected_controller_checkboxes[index]->setChecked(connected);
- emulated_controllers[index]->setCurrentIndex(GetIndexFromControllerType(
- Settings::values.players.GetValue()[index].controller_type, index));
- }
-
- UpdateDockedState(Settings::values.players.GetValue()[HANDHELD_INDEX].connected);
-
- ui->vibrationGroup->setChecked(Settings::values.vibration_enabled.GetValue());
- ui->motionGroup->setChecked(Settings::values.motion_enabled.GetValue());
-}
-
-void QtControllerSelectorDialog::CallConfigureVibrationDialog() {
- ConfigureVibration dialog(this);
-
- dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
- Qt::WindowSystemMenuHint);
- dialog.setWindowModality(Qt::WindowModal);
-
- if (dialog.exec() == QDialog::Accepted) {
- dialog.ApplyConfiguration();
- }
-}
-
-void QtControllerSelectorDialog::CallConfigureMotionTouchDialog() {
- ConfigureMotionTouch dialog(this, input_subsystem);
-
- dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
- Qt::WindowSystemMenuHint);
- dialog.setWindowModality(Qt::WindowModal);
-
- if (dialog.exec() == QDialog::Accepted) {
- dialog.ApplyConfiguration();
- }
-}
-
-void QtControllerSelectorDialog::CallConfigureInputProfileDialog() {
- ConfigureInputProfileDialog dialog(this, input_subsystem, input_profiles.get());
-
- dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
- Qt::WindowSystemMenuHint);
- dialog.setWindowModality(Qt::WindowModal);
- dialog.exec();
-}
-
-bool QtControllerSelectorDialog::CheckIfParametersMet() {
- // Here, we check and validate the current configuration against all applicable parameters.
- const auto num_connected_players = static_cast<int>(
- std::count_if(player_groupboxes.begin(), player_groupboxes.end(),
- [this](const QGroupBox* player) { return player->isChecked(); }));
-
- const auto min_supported_players = parameters.enable_single_mode ? 1 : parameters.min_players;
- const auto max_supported_players = parameters.enable_single_mode ? 1 : parameters.max_players;
-
- // First, check against the number of connected players.
- if (num_connected_players < min_supported_players ||
- num_connected_players > max_supported_players) {
- parameters_met = false;
- ui->buttonBox->setEnabled(parameters_met);
- return parameters_met;
- }
-
- // Next, check against all connected controllers.
- const auto all_controllers_compatible = [this] {
- for (std::size_t index = 0; index < NUM_PLAYERS; ++index) {
- // Skip controllers that are not used, we only care about the currently connected ones.
- if (!player_groupboxes[index]->isChecked() || !player_groupboxes[index]->isEnabled()) {
- continue;
- }
-
- const auto compatible = IsControllerCompatible(
- GetControllerTypeFromIndex(emulated_controllers[index]->currentIndex(), index),
- parameters);
-
- // If any controller is found to be incompatible, return false early.
- if (!compatible) {
- return false;
- }
- }
-
- // Reaching here means all currently connected controllers are compatible.
- return true;
- }();
-
- parameters_met = all_controllers_compatible;
- ui->buttonBox->setEnabled(parameters_met);
- return parameters_met;
-}
-
-void QtControllerSelectorDialog::SetSupportedControllers() {
- const QString theme = [] {
- if (QIcon::themeName().contains(QStringLiteral("dark"))) {
- return QStringLiteral("_dark");
- } else if (QIcon::themeName().contains(QStringLiteral("midnight"))) {
- return QStringLiteral("_midnight");
- } else {
- return QString{};
- }
- }();
-
- if (parameters.enable_single_mode && parameters.allow_handheld) {
- ui->controllerSupported1->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_handheld%0); ").arg(theme));
- } else {
- ui->controllerSupported1->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_handheld%0_disabled); ").arg(theme));
- }
-
- if (parameters.allow_dual_joycons) {
- ui->controllerSupported2->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ").arg(theme));
- } else {
- ui->controllerSupported2->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_dual_joycon%0_disabled); ").arg(theme));
- }
-
- if (parameters.allow_left_joycon) {
- ui->controllerSupported3->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_joycon_left%0); ").arg(theme));
- } else {
- ui->controllerSupported3->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_joycon_left%0_disabled); ").arg(theme));
- }
-
- if (parameters.allow_right_joycon) {
- ui->controllerSupported4->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_joycon_right%0); ").arg(theme));
- } else {
- ui->controllerSupported4->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme));
- }
-
- if (parameters.allow_pro_controller || parameters.allow_gamecube_controller) {
- ui->controllerSupported5->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme));
- } else {
- ui->controllerSupported5->setStyleSheet(
- QStringLiteral("image: url(:/controller/applet_pro_controller%0_disabled); ")
- .arg(theme));
- }
-
- // enable_single_mode overrides min_players and max_players.
- if (parameters.enable_single_mode) {
- ui->numberSupportedLabel->setText(QStringLiteral("1"));
- return;
- }
-
- if (parameters.min_players == parameters.max_players) {
- ui->numberSupportedLabel->setText(QStringLiteral("%1").arg(parameters.max_players));
- } else {
- ui->numberSupportedLabel->setText(
- QStringLiteral("%1 - %2").arg(parameters.min_players).arg(parameters.max_players));
- }
-}
-
-void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index) {
- auto& pairs = index_controller_type_pairs[player_index];
-
- pairs.clear();
- emulated_controllers[player_index]->clear();
-
- pairs.emplace_back(emulated_controllers[player_index]->count(),
- Settings::ControllerType::ProController);
- emulated_controllers[player_index]->addItem(tr("Pro Controller"));
-
- pairs.emplace_back(emulated_controllers[player_index]->count(),
- Settings::ControllerType::DualJoyconDetached);
- emulated_controllers[player_index]->addItem(tr("Dual Joycons"));
-
- pairs.emplace_back(emulated_controllers[player_index]->count(),
- Settings::ControllerType::LeftJoycon);
- emulated_controllers[player_index]->addItem(tr("Left Joycon"));
-
- pairs.emplace_back(emulated_controllers[player_index]->count(),
- Settings::ControllerType::RightJoycon);
- emulated_controllers[player_index]->addItem(tr("Right Joycon"));
-
- if (player_index == 0) {
- pairs.emplace_back(emulated_controllers[player_index]->count(),
- Settings::ControllerType::Handheld);
- emulated_controllers[player_index]->addItem(tr("Handheld"));
- }
-
- pairs.emplace_back(emulated_controllers[player_index]->count(),
- Settings::ControllerType::GameCube);
- emulated_controllers[player_index]->addItem(tr("GameCube Controller"));
-}
-
-Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex(
- int index, std::size_t player_index) const {
- const auto& pairs = index_controller_type_pairs[player_index];
-
- const auto it = std::find_if(pairs.begin(), pairs.end(),
- [index](const auto& pair) { return pair.first == index; });
-
- if (it == pairs.end()) {
- return Settings::ControllerType::ProController;
- }
-
- return it->second;
-}
-
-int QtControllerSelectorDialog::GetIndexFromControllerType(Settings::ControllerType type,
- std::size_t player_index) const {
- const auto& pairs = index_controller_type_pairs[player_index];
-
- const auto it = std::find_if(pairs.begin(), pairs.end(),
- [type](const auto& pair) { return pair.second == type; });
-
- if (it == pairs.end()) {
- return 0;
- }
-
- return it->first;
-}
-
-void QtControllerSelectorDialog::UpdateControllerIcon(std::size_t player_index) {
- if (!player_groupboxes[player_index]->isChecked()) {
- connected_controller_icons[player_index]->setStyleSheet(QString{});
- player_labels[player_index]->show();
- return;
- }
-
- const QString stylesheet = [this, player_index] {
- switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
- player_index)) {
- case Settings::ControllerType::ProController:
- case Settings::ControllerType::GameCube:
- return QStringLiteral("image: url(:/controller/applet_pro_controller%0); ");
- case Settings::ControllerType::DualJoyconDetached:
- return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ");
- case Settings::ControllerType::LeftJoycon:
- return QStringLiteral("image: url(:/controller/applet_joycon_left%0); ");
- case Settings::ControllerType::RightJoycon:
- return QStringLiteral("image: url(:/controller/applet_joycon_right%0); ");
- case Settings::ControllerType::Handheld:
- return QStringLiteral("image: url(:/controller/applet_handheld%0); ");
- default:
- return QString{};
- }
- }();
-
- if (stylesheet.isEmpty()) {
- connected_controller_icons[player_index]->setStyleSheet(QString{});
- player_labels[player_index]->show();
- return;
- }
-
- const QString theme = [] {
- if (QIcon::themeName().contains(QStringLiteral("dark"))) {
- return QStringLiteral("_dark");
- } else if (QIcon::themeName().contains(QStringLiteral("midnight"))) {
- return QStringLiteral("_midnight");
- } else {
- return QString{};
- }
- }();
-
- connected_controller_icons[player_index]->setStyleSheet(stylesheet.arg(theme));
- player_labels[player_index]->hide();
-}
-
-void QtControllerSelectorDialog::UpdateControllerState(std::size_t player_index) {
- auto& player = Settings::values.players.GetValue()[player_index];
-
- const auto controller_type = GetControllerTypeFromIndex(
- emulated_controllers[player_index]->currentIndex(), player_index);
- const auto player_connected = player_groupboxes[player_index]->isChecked() &&
- controller_type != Settings::ControllerType::Handheld;
-
- if (player.controller_type == controller_type && player.connected == player_connected) {
- // Set vibration devices in the event that the input device has changed.
- ConfigureVibration::SetVibrationDevices(player_index);
- return;
- }
-
- // Disconnect the controller first.
- UpdateController(controller_type, player_index, false);
-
- player.controller_type = controller_type;
- player.connected = player_connected;
-
- ConfigureVibration::SetVibrationDevices(player_index);
-
- // Handheld
- if (player_index == 0) {
- auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX];
- if (controller_type == Settings::ControllerType::Handheld) {
- handheld = player;
- }
- handheld.connected = player_groupboxes[player_index]->isChecked() &&
- controller_type == Settings::ControllerType::Handheld;
- UpdateController(Settings::ControllerType::Handheld, 8, handheld.connected);
- }
-
- if (!player.connected) {
- return;
- }
-
- // This emulates a delay between disconnecting and reconnecting controllers as some games
- // do not respond to a change in controller type if it was instantaneous.
- using namespace std::chrono_literals;
- std::this_thread::sleep_for(60ms);
-
- UpdateController(controller_type, player_index, player_connected);
-}
-
-void QtControllerSelectorDialog::UpdateLEDPattern(std::size_t player_index) {
- if (!player_groupboxes[player_index]->isChecked() ||
- GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
- player_index) == Settings::ControllerType::Handheld) {
- led_patterns_boxes[player_index][0]->setChecked(false);
- led_patterns_boxes[player_index][1]->setChecked(false);
- led_patterns_boxes[player_index][2]->setChecked(false);
- led_patterns_boxes[player_index][3]->setChecked(false);
- return;
- }
-
- led_patterns_boxes[player_index][0]->setChecked(led_patterns[player_index][0]);
- led_patterns_boxes[player_index][1]->setChecked(led_patterns[player_index][1]);
- led_patterns_boxes[player_index][2]->setChecked(led_patterns[player_index][2]);
- led_patterns_boxes[player_index][3]->setChecked(led_patterns[player_index][3]);
-}
-
-void QtControllerSelectorDialog::UpdateBorderColor(std::size_t player_index) {
- if (!parameters.enable_border_color ||
- player_index >= static_cast<std::size_t>(parameters.max_players) ||
- player_groupboxes[player_index]->styleSheet().contains(QStringLiteral("QGroupBox"))) {
- return;
- }
-
- player_groupboxes[player_index]->setStyleSheet(
- player_groupboxes[player_index]->styleSheet().append(
- QStringLiteral("QGroupBox#groupPlayer%1Connected:checked "
- "{ border: 1px solid rgba(%2, %3, %4, %5); }")
- .arg(player_index + 1)
- .arg(parameters.border_colors[player_index][0])
- .arg(parameters.border_colors[player_index][1])
- .arg(parameters.border_colors[player_index][2])
- .arg(parameters.border_colors[player_index][3])));
-}
-
-void QtControllerSelectorDialog::SetExplainText(std::size_t player_index) {
- if (!parameters.enable_explain_text ||
- player_index >= static_cast<std::size_t>(parameters.max_players)) {
- return;
- }
-
- explain_text_labels[player_index]->setText(QString::fromStdString(
- Common::StringFromFixedZeroTerminatedBuffer(parameters.explain_text[player_index].data(),
- parameters.explain_text[player_index].size())));
-}
-
-void QtControllerSelectorDialog::UpdateDockedState(bool is_handheld) {
- // Disallow changing the console mode if the controller type is handheld.
- ui->radioDocked->setEnabled(!is_handheld);
- ui->radioUndocked->setEnabled(!is_handheld);
-
- ui->radioDocked->setChecked(Settings::values.use_docked_mode.GetValue());
- ui->radioUndocked->setChecked(!Settings::values.use_docked_mode.GetValue());
-
- // Also force into undocked mode if the controller type is handheld.
- if (is_handheld) {
- ui->radioUndocked->setChecked(true);
- }
-}
-
-void QtControllerSelectorDialog::DisableUnsupportedPlayers() {
- const auto max_supported_players = parameters.enable_single_mode ? 1 : parameters.max_players;
-
- switch (max_supported_players) {
- case 0:
- default:
- UNREACHABLE();
- return;
- case 1:
- ui->widgetSpacer->hide();
- ui->widgetSpacer2->hide();
- ui->widgetSpacer3->hide();
- ui->widgetSpacer4->hide();
- break;
- case 2:
- ui->widgetSpacer->hide();
- ui->widgetSpacer2->hide();
- ui->widgetSpacer3->hide();
- break;
- case 3:
- ui->widgetSpacer->hide();
- ui->widgetSpacer2->hide();
- break;
- case 4:
- ui->widgetSpacer->hide();
- break;
- case 5:
- case 6:
- case 7:
- case 8:
- break;
- }
-
- for (std::size_t index = max_supported_players; index < NUM_PLAYERS; ++index) {
- // Disconnect any unsupported players here and disable or hide them if applicable.
- Settings::values.players.GetValue()[index].connected = false;
- UpdateController(Settings::values.players.GetValue()[index].controller_type, index, false);
- // Hide the player widgets when max_supported_controllers is less than or equal to 4.
- if (max_supported_players <= 4) {
- player_widgets[index]->hide();
- }
-
- // Disable and hide the following to prevent these from interaction.
- player_widgets[index]->setDisabled(true);
- connected_controller_checkboxes[index]->setDisabled(true);
- connected_controller_labels[index]->hide();
- connected_controller_checkboxes[index]->hide();
- }
-}
-
-QtControllerSelector::QtControllerSelector(GMainWindow& parent) {
- connect(this, &QtControllerSelector::MainWindowReconfigureControllers, &parent,
- &GMainWindow::ControllerSelectorReconfigureControllers, Qt::QueuedConnection);
- connect(&parent, &GMainWindow::ControllerSelectorReconfigureFinished, this,
- &QtControllerSelector::MainWindowReconfigureFinished, Qt::QueuedConnection);
-}
-
-QtControllerSelector::~QtControllerSelector() = default;
-
-void QtControllerSelector::ReconfigureControllers(
- std::function<void()> callback_, const Core::Frontend::ControllerParameters& parameters) const {
- callback = std::move(callback_);
- emit MainWindowReconfigureControllers(parameters);
-}
-
-void QtControllerSelector::MainWindowReconfigureFinished() {
- // Acquire the HLE mutex
- std::lock_guard lock(HLE::g_hle_lock);
- callback();
-}
diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp
deleted file mode 100644
index 085688cd4..000000000
--- a/src/yuzu/applets/error.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <QDateTime>
-#include "core/hle/lock.h"
-#include "yuzu/applets/error.h"
-#include "yuzu/main.h"
-
-QtErrorDisplay::QtErrorDisplay(GMainWindow& parent) {
- connect(this, &QtErrorDisplay::MainWindowDisplayError, &parent,
- &GMainWindow::ErrorDisplayDisplayError, Qt::QueuedConnection);
- connect(&parent, &GMainWindow::ErrorDisplayFinished, this,
- &QtErrorDisplay::MainWindowFinishedError, Qt::DirectConnection);
-}
-
-QtErrorDisplay::~QtErrorDisplay() = default;
-
-void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
- callback = std::move(finished);
- emit MainWindowDisplayError(
- tr("Error Code: %1-%2 (0x%3)")
- .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
- .arg(error.description, 4, 10, QChar::fromLatin1('0'))
- .arg(error.raw, 8, 16, QChar::fromLatin1('0')),
- tr("An error has occurred.\nPlease try again or contact the developer of the software."));
-}
-
-void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time,
- std::function<void()> finished) const {
- callback = std::move(finished);
-
- const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
- emit MainWindowDisplayError(
- tr("Error Code: %1-%2 (0x%3)")
- .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
- .arg(error.description, 4, 10, QChar::fromLatin1('0'))
- .arg(error.raw, 8, 16, QChar::fromLatin1('0')),
- tr("An error occurred on %1 at %2.\nPlease try again or contact the developer of the "
- "software.")
- .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy")))
- .arg(date_time.toString(QStringLiteral("h:mm:ss A"))));
-}
-
-void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_text,
- std::string fullscreen_text,
- std::function<void()> finished) const {
- callback = std::move(finished);
- emit MainWindowDisplayError(
- tr("Error Code: %1-%2 (0x%3)")
- .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
- .arg(error.description, 4, 10, QChar::fromLatin1('0'))
- .arg(error.raw, 8, 16, QChar::fromLatin1('0')),
- tr("An error has occurred.\n\n%1\n\n%2")
- .arg(QString::fromStdString(dialog_text))
- .arg(QString::fromStdString(fullscreen_text)));
-}
-
-void QtErrorDisplay::MainWindowFinishedError() {
- // Acquire the HLE mutex
- std::lock_guard lock{HLE::g_hle_lock};
- callback();
-}
diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp
deleted file mode 100644
index 62fd1141c..000000000
--- a/src/yuzu/applets/profile_select.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <mutex>
-#include <QDialogButtonBox>
-#include <QHeaderView>
-#include <QLabel>
-#include <QLineEdit>
-#include <QScrollArea>
-#include <QStandardItemModel>
-#include <QVBoxLayout>
-#include "common/fs/path_util.h"
-#include "common/string_util.h"
-#include "core/constants.h"
-#include "core/hle/lock.h"
-#include "yuzu/applets/profile_select.h"
-#include "yuzu/main.h"
-
-namespace {
-QString FormatUserEntryText(const QString& username, Common::UUID uuid) {
- return QtProfileSelectionDialog::tr(
- "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. "
- "00112233-4455-6677-8899-AABBCCDDEEFF))")
- .arg(username, QString::fromStdString(uuid.FormatSwitch()));
-}
-
-QString GetImagePath(Common::UUID uuid) {
- const auto path =
- Common::FS::GetYuzuPath(Common::FS::YuzuPath::NANDDir) /
- fmt::format("system/save/8000000000000010/su/avators/{}.jpg", uuid.FormatSwitch());
- return QString::fromStdString(Common::FS::PathToUTF8String(path));
-}
-
-QPixmap GetIcon(Common::UUID uuid) {
- QPixmap icon{GetImagePath(uuid)};
-
- if (!icon) {
- icon.fill(Qt::black);
- icon.loadFromData(Core::Constants::ACCOUNT_BACKUP_JPEG.data(),
- static_cast<u32>(Core::Constants::ACCOUNT_BACKUP_JPEG.size()));
- }
-
- return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
-}
-} // Anonymous namespace
-
-QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
- : QDialog(parent), profile_manager(std::make_unique<Service::Account::ProfileManager>()) {
- outer_layout = new QVBoxLayout;
-
- instruction_label = new QLabel(tr("Select a user:"));
-
- scroll_area = new QScrollArea;
-
- buttons = new QDialogButtonBox(QDialogButtonBox::Cancel | QDialogButtonBox::Ok);
- connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept);
- connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject);
-
- outer_layout->addWidget(instruction_label);
- outer_layout->addWidget(scroll_area);
- outer_layout->addWidget(buttons);
-
- layout = new QVBoxLayout;
- tree_view = new QTreeView;
- item_model = new QStandardItemModel(tree_view);
- tree_view->setModel(item_model);
-
- tree_view->setAlternatingRowColors(true);
- tree_view->setSelectionMode(QHeaderView::SingleSelection);
- tree_view->setSelectionBehavior(QHeaderView::SelectRows);
- tree_view->setVerticalScrollMode(QHeaderView::ScrollPerPixel);
- tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel);
- tree_view->setSortingEnabled(true);
- tree_view->setEditTriggers(QHeaderView::NoEditTriggers);
- tree_view->setUniformRowHeights(true);
- tree_view->setIconSize({64, 64});
- tree_view->setContextMenuPolicy(Qt::NoContextMenu);
-
- item_model->insertColumns(0, 1);
- item_model->setHeaderData(0, Qt::Horizontal, tr("Users"));
-
- // We must register all custom types with the Qt Automoc system so that we are able to use it
- // with signals/slots. In this case, QList falls under the umbrella of custom types.
- qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
-
- layout->setContentsMargins(0, 0, 0, 0);
- layout->setSpacing(0);
- layout->addWidget(tree_view);
-
- scroll_area->setLayout(layout);
-
- connect(tree_view, &QTreeView::clicked, this, &QtProfileSelectionDialog::SelectUser);
-
- const auto& profiles = profile_manager->GetAllUsers();
- for (const auto& user : profiles) {
- Service::Account::ProfileBase profile{};
- if (!profile_manager->GetProfileBase(user, profile))
- continue;
-
- const auto username = Common::StringFromFixedZeroTerminatedBuffer(
- reinterpret_cast<const char*>(profile.username.data()), profile.username.size());
-
- list_items.push_back(QList<QStandardItem*>{new QStandardItem{
- GetIcon(user), FormatUserEntryText(QString::fromStdString(username), user)}});
- }
-
- for (const auto& item : list_items)
- item_model->appendRow(item);
-
- setLayout(outer_layout);
- setWindowTitle(tr("Profile Selector"));
- resize(550, 400);
-}
-
-QtProfileSelectionDialog::~QtProfileSelectionDialog() = default;
-
-int QtProfileSelectionDialog::exec() {
- // Skip profile selection when there's only one.
- if (profile_manager->GetUserCount() == 1) {
- user_index = 0;
- return QDialog::Accepted;
- }
- return QDialog::exec();
-}
-
-void QtProfileSelectionDialog::accept() {
- QDialog::accept();
-}
-
-void QtProfileSelectionDialog::reject() {
- user_index = 0;
- QDialog::reject();
-}
-
-int QtProfileSelectionDialog::GetIndex() const {
- return user_index;
-}
-
-void QtProfileSelectionDialog::SelectUser(const QModelIndex& index) {
- user_index = index.row();
-}
-
-QtProfileSelector::QtProfileSelector(GMainWindow& parent) {
- connect(this, &QtProfileSelector::MainWindowSelectProfile, &parent,
- &GMainWindow::ProfileSelectorSelectProfile, Qt::QueuedConnection);
- connect(&parent, &GMainWindow::ProfileSelectorFinishedSelection, this,
- &QtProfileSelector::MainWindowFinishedSelection, Qt::DirectConnection);
-}
-
-QtProfileSelector::~QtProfileSelector() = default;
-
-void QtProfileSelector::SelectProfile(
- std::function<void(std::optional<Common::UUID>)> callback_) const {
- callback = std::move(callback_);
- emit MainWindowSelectProfile();
-}
-
-void QtProfileSelector::MainWindowFinishedSelection(std::optional<Common::UUID> uuid) {
- // Acquire the HLE mutex
- std::lock_guard lock{HLE::g_hle_lock};
- callback(uuid);
-}
diff --git a/src/yuzu/applets/qt_controller.cpp b/src/yuzu/applets/qt_controller.cpp
new file mode 100644
index 000000000..97106d2cc
--- /dev/null
+++ b/src/yuzu/applets/qt_controller.cpp
@@ -0,0 +1,695 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <thread>
+
+#include "common/assert.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/hle/lock.h"
+#include "core/hle/service/hid/controllers/npad.h"
+#include "core/hle/service/hid/hid.h"
+#include "core/hle/service/sm/sm.h"
+#include "ui_qt_controller.h"
+#include "yuzu/applets/qt_controller.h"
+#include "yuzu/configuration/configure_input.h"
+#include "yuzu/configuration/configure_input_profile_dialog.h"
+#include "yuzu/configuration/configure_motion_touch.h"
+#include "yuzu/configuration/configure_vibration.h"
+#include "yuzu/configuration/input_profiles.h"
+#include "yuzu/main.h"
+
+namespace {
+
+constexpr std::size_t HANDHELD_INDEX = 8;
+
+constexpr std::array<std::array<bool, 4>, 8> led_patterns{{
+ {true, false, false, false},
+ {true, true, false, false},
+ {true, true, true, false},
+ {true, true, true, true},
+ {true, false, false, true},
+ {true, false, true, false},
+ {true, false, true, true},
+ {false, true, true, false},
+}};
+
+void UpdateController(Settings::ControllerType controller_type, std::size_t npad_index,
+ bool connected) {
+ Core::System& system{Core::System::GetInstance()};
+
+ if (!system.IsPoweredOn()) {
+ return;
+ }
+
+ Service::SM::ServiceManager& sm = system.ServiceManager();
+
+ auto& npad =
+ sm.GetService<Service::HID::Hid>("hid")
+ ->GetAppletResource()
+ ->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad);
+
+ npad.UpdateControllerAt(npad.MapSettingsTypeToNPad(controller_type), npad_index, connected);
+}
+
+// Returns true if the given controller type is compatible with the given parameters.
+bool IsControllerCompatible(Settings::ControllerType controller_type,
+ Core::Frontend::ControllerParameters parameters) {
+ switch (controller_type) {
+ case Settings::ControllerType::ProController:
+ return parameters.allow_pro_controller;
+ case Settings::ControllerType::DualJoyconDetached:
+ return parameters.allow_dual_joycons;
+ case Settings::ControllerType::LeftJoycon:
+ return parameters.allow_left_joycon;
+ case Settings::ControllerType::RightJoycon:
+ return parameters.allow_right_joycon;
+ case Settings::ControllerType::Handheld:
+ return parameters.enable_single_mode && parameters.allow_handheld;
+ case Settings::ControllerType::GameCube:
+ return parameters.allow_gamecube_controller;
+ default:
+ return false;
+ }
+}
+
+} // namespace
+
+QtControllerSelectorDialog::QtControllerSelectorDialog(
+ QWidget* parent, Core::Frontend::ControllerParameters parameters_,
+ InputCommon::InputSubsystem* input_subsystem_)
+ : QDialog(parent), ui(std::make_unique<Ui::QtControllerSelectorDialog>()),
+ parameters(std::move(parameters_)), input_subsystem{input_subsystem_},
+ input_profiles(std::make_unique<InputProfiles>()) {
+ ui->setupUi(this);
+
+ player_widgets = {
+ ui->widgetPlayer1, ui->widgetPlayer2, ui->widgetPlayer3, ui->widgetPlayer4,
+ ui->widgetPlayer5, ui->widgetPlayer6, ui->widgetPlayer7, ui->widgetPlayer8,
+ };
+
+ player_groupboxes = {
+ ui->groupPlayer1Connected, ui->groupPlayer2Connected, ui->groupPlayer3Connected,
+ ui->groupPlayer4Connected, ui->groupPlayer5Connected, ui->groupPlayer6Connected,
+ ui->groupPlayer7Connected, ui->groupPlayer8Connected,
+ };
+
+ connected_controller_icons = {
+ ui->controllerPlayer1, ui->controllerPlayer2, ui->controllerPlayer3, ui->controllerPlayer4,
+ ui->controllerPlayer5, ui->controllerPlayer6, ui->controllerPlayer7, ui->controllerPlayer8,
+ };
+
+ led_patterns_boxes = {{
+ {ui->checkboxPlayer1LED1, ui->checkboxPlayer1LED2, ui->checkboxPlayer1LED3,
+ ui->checkboxPlayer1LED4},
+ {ui->checkboxPlayer2LED1, ui->checkboxPlayer2LED2, ui->checkboxPlayer2LED3,
+ ui->checkboxPlayer2LED4},
+ {ui->checkboxPlayer3LED1, ui->checkboxPlayer3LED2, ui->checkboxPlayer3LED3,
+ ui->checkboxPlayer3LED4},
+ {ui->checkboxPlayer4LED1, ui->checkboxPlayer4LED2, ui->checkboxPlayer4LED3,
+ ui->checkboxPlayer4LED4},
+ {ui->checkboxPlayer5LED1, ui->checkboxPlayer5LED2, ui->checkboxPlayer5LED3,
+ ui->checkboxPlayer5LED4},
+ {ui->checkboxPlayer6LED1, ui->checkboxPlayer6LED2, ui->checkboxPlayer6LED3,
+ ui->checkboxPlayer6LED4},
+ {ui->checkboxPlayer7LED1, ui->checkboxPlayer7LED2, ui->checkboxPlayer7LED3,
+ ui->checkboxPlayer7LED4},
+ {ui->checkboxPlayer8LED1, ui->checkboxPlayer8LED2, ui->checkboxPlayer8LED3,
+ ui->checkboxPlayer8LED4},
+ }};
+
+ explain_text_labels = {
+ ui->labelPlayer1Explain, ui->labelPlayer2Explain, ui->labelPlayer3Explain,
+ ui->labelPlayer4Explain, ui->labelPlayer5Explain, ui->labelPlayer6Explain,
+ ui->labelPlayer7Explain, ui->labelPlayer8Explain,
+ };
+
+ emulated_controllers = {
+ ui->comboPlayer1Emulated, ui->comboPlayer2Emulated, ui->comboPlayer3Emulated,
+ ui->comboPlayer4Emulated, ui->comboPlayer5Emulated, ui->comboPlayer6Emulated,
+ ui->comboPlayer7Emulated, ui->comboPlayer8Emulated,
+ };
+
+ player_labels = {
+ ui->labelPlayer1, ui->labelPlayer2, ui->labelPlayer3, ui->labelPlayer4,
+ ui->labelPlayer5, ui->labelPlayer6, ui->labelPlayer7, ui->labelPlayer8,
+ };
+
+ connected_controller_labels = {
+ ui->labelConnectedPlayer1, ui->labelConnectedPlayer2, ui->labelConnectedPlayer3,
+ ui->labelConnectedPlayer4, ui->labelConnectedPlayer5, ui->labelConnectedPlayer6,
+ ui->labelConnectedPlayer7, ui->labelConnectedPlayer8,
+ };
+
+ connected_controller_checkboxes = {
+ ui->checkboxPlayer1Connected, ui->checkboxPlayer2Connected, ui->checkboxPlayer3Connected,
+ ui->checkboxPlayer4Connected, ui->checkboxPlayer5Connected, ui->checkboxPlayer6Connected,
+ ui->checkboxPlayer7Connected, ui->checkboxPlayer8Connected,
+ };
+
+ // Setup/load everything prior to setting up connections.
+ // This avoids unintentionally changing the states of elements while loading them in.
+ SetSupportedControllers();
+ DisableUnsupportedPlayers();
+
+ for (std::size_t player_index = 0; player_index < NUM_PLAYERS; ++player_index) {
+ SetEmulatedControllers(player_index);
+ }
+
+ LoadConfiguration();
+
+ for (std::size_t i = 0; i < NUM_PLAYERS; ++i) {
+ SetExplainText(i);
+ UpdateControllerIcon(i);
+ UpdateLEDPattern(i);
+ UpdateBorderColor(i);
+
+ connect(player_groupboxes[i], &QGroupBox::toggled, [this, i](bool checked) {
+ if (checked) {
+ for (std::size_t index = 0; index <= i; ++index) {
+ connected_controller_checkboxes[index]->setChecked(checked);
+ }
+ } else {
+ for (std::size_t index = i; index < NUM_PLAYERS; ++index) {
+ connected_controller_checkboxes[index]->setChecked(checked);
+ }
+ }
+ });
+
+ connect(emulated_controllers[i], qOverload<int>(&QComboBox::currentIndexChanged),
+ [this, i](int) {
+ UpdateControllerIcon(i);
+ UpdateControllerState(i);
+ UpdateLEDPattern(i);
+ CheckIfParametersMet();
+ });
+
+ connect(connected_controller_checkboxes[i], &QCheckBox::stateChanged, [this, i](int state) {
+ player_groupboxes[i]->setChecked(state == Qt::Checked);
+ UpdateControllerIcon(i);
+ UpdateControllerState(i);
+ UpdateLEDPattern(i);
+ UpdateBorderColor(i);
+ CheckIfParametersMet();
+ });
+
+ if (i == 0) {
+ connect(emulated_controllers[i], qOverload<int>(&QComboBox::currentIndexChanged),
+ [this, i](int index) {
+ UpdateDockedState(GetControllerTypeFromIndex(index, i) ==
+ Settings::ControllerType::Handheld);
+ });
+ }
+ }
+
+ connect(ui->vibrationButton, &QPushButton::clicked, this,
+ &QtControllerSelectorDialog::CallConfigureVibrationDialog);
+
+ connect(ui->motionButton, &QPushButton::clicked, this,
+ &QtControllerSelectorDialog::CallConfigureMotionTouchDialog);
+
+ connect(ui->inputConfigButton, &QPushButton::clicked, this,
+ &QtControllerSelectorDialog::CallConfigureInputProfileDialog);
+
+ connect(ui->buttonBox, &QDialogButtonBox::accepted, this,
+ &QtControllerSelectorDialog::ApplyConfiguration);
+
+ // Enhancement: Check if the parameters have already been met before disconnecting controllers.
+ // If all the parameters are met AND only allows a single player,
+ // stop the constructor here as we do not need to continue.
+ if (CheckIfParametersMet() && parameters.enable_single_mode) {
+ return;
+ }
+
+ // If keep_controllers_connected is false, forcefully disconnect all controllers
+ if (!parameters.keep_controllers_connected) {
+ for (auto player : player_groupboxes) {
+ player->setChecked(false);
+ }
+ }
+
+ resize(0, 0);
+}
+
+QtControllerSelectorDialog::~QtControllerSelectorDialog() = default;
+
+int QtControllerSelectorDialog::exec() {
+ if (parameters_met && parameters.enable_single_mode) {
+ return QDialog::Accepted;
+ }
+ return QDialog::exec();
+}
+
+void QtControllerSelectorDialog::ApplyConfiguration() {
+ const bool pre_docked_mode = Settings::values.use_docked_mode.GetValue();
+ Settings::values.use_docked_mode.SetValue(ui->radioDocked->isChecked());
+ OnDockedModeChanged(pre_docked_mode, Settings::values.use_docked_mode.GetValue());
+
+ Settings::values.vibration_enabled.SetValue(ui->vibrationGroup->isChecked());
+ Settings::values.motion_enabled.SetValue(ui->motionGroup->isChecked());
+}
+
+void QtControllerSelectorDialog::LoadConfiguration() {
+ for (std::size_t index = 0; index < NUM_PLAYERS; ++index) {
+ const auto connected =
+ Settings::values.players.GetValue()[index].connected ||
+ (index == 0 && Settings::values.players.GetValue()[HANDHELD_INDEX].connected);
+ player_groupboxes[index]->setChecked(connected);
+ connected_controller_checkboxes[index]->setChecked(connected);
+ emulated_controllers[index]->setCurrentIndex(GetIndexFromControllerType(
+ Settings::values.players.GetValue()[index].controller_type, index));
+ }
+
+ UpdateDockedState(Settings::values.players.GetValue()[HANDHELD_INDEX].connected);
+
+ ui->vibrationGroup->setChecked(Settings::values.vibration_enabled.GetValue());
+ ui->motionGroup->setChecked(Settings::values.motion_enabled.GetValue());
+}
+
+void QtControllerSelectorDialog::CallConfigureVibrationDialog() {
+ ConfigureVibration dialog(this);
+
+ dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
+ Qt::WindowSystemMenuHint);
+ dialog.setWindowModality(Qt::WindowModal);
+
+ if (dialog.exec() == QDialog::Accepted) {
+ dialog.ApplyConfiguration();
+ }
+}
+
+void QtControllerSelectorDialog::CallConfigureMotionTouchDialog() {
+ ConfigureMotionTouch dialog(this, input_subsystem);
+
+ dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
+ Qt::WindowSystemMenuHint);
+ dialog.setWindowModality(Qt::WindowModal);
+
+ if (dialog.exec() == QDialog::Accepted) {
+ dialog.ApplyConfiguration();
+ }
+}
+
+void QtControllerSelectorDialog::CallConfigureInputProfileDialog() {
+ ConfigureInputProfileDialog dialog(this, input_subsystem, input_profiles.get());
+
+ dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
+ Qt::WindowSystemMenuHint);
+ dialog.setWindowModality(Qt::WindowModal);
+ dialog.exec();
+}
+
+bool QtControllerSelectorDialog::CheckIfParametersMet() {
+ // Here, we check and validate the current configuration against all applicable parameters.
+ const auto num_connected_players = static_cast<int>(
+ std::count_if(player_groupboxes.begin(), player_groupboxes.end(),
+ [this](const QGroupBox* player) { return player->isChecked(); }));
+
+ const auto min_supported_players = parameters.enable_single_mode ? 1 : parameters.min_players;
+ const auto max_supported_players = parameters.enable_single_mode ? 1 : parameters.max_players;
+
+ // First, check against the number of connected players.
+ if (num_connected_players < min_supported_players ||
+ num_connected_players > max_supported_players) {
+ parameters_met = false;
+ ui->buttonBox->setEnabled(parameters_met);
+ return parameters_met;
+ }
+
+ // Next, check against all connected controllers.
+ const auto all_controllers_compatible = [this] {
+ for (std::size_t index = 0; index < NUM_PLAYERS; ++index) {
+ // Skip controllers that are not used, we only care about the currently connected ones.
+ if (!player_groupboxes[index]->isChecked() || !player_groupboxes[index]->isEnabled()) {
+ continue;
+ }
+
+ const auto compatible = IsControllerCompatible(
+ GetControllerTypeFromIndex(emulated_controllers[index]->currentIndex(), index),
+ parameters);
+
+ // If any controller is found to be incompatible, return false early.
+ if (!compatible) {
+ return false;
+ }
+ }
+
+ // Reaching here means all currently connected controllers are compatible.
+ return true;
+ }();
+
+ parameters_met = all_controllers_compatible;
+ ui->buttonBox->setEnabled(parameters_met);
+ return parameters_met;
+}
+
+void QtControllerSelectorDialog::SetSupportedControllers() {
+ const QString theme = [] {
+ if (QIcon::themeName().contains(QStringLiteral("dark"))) {
+ return QStringLiteral("_dark");
+ } else if (QIcon::themeName().contains(QStringLiteral("midnight"))) {
+ return QStringLiteral("_midnight");
+ } else {
+ return QString{};
+ }
+ }();
+
+ if (parameters.enable_single_mode && parameters.allow_handheld) {
+ ui->controllerSupported1->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_handheld%0); ").arg(theme));
+ } else {
+ ui->controllerSupported1->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_handheld%0_disabled); ").arg(theme));
+ }
+
+ if (parameters.allow_dual_joycons) {
+ ui->controllerSupported2->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ").arg(theme));
+ } else {
+ ui->controllerSupported2->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_dual_joycon%0_disabled); ").arg(theme));
+ }
+
+ if (parameters.allow_left_joycon) {
+ ui->controllerSupported3->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_joycon_left%0); ").arg(theme));
+ } else {
+ ui->controllerSupported3->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_joycon_left%0_disabled); ").arg(theme));
+ }
+
+ if (parameters.allow_right_joycon) {
+ ui->controllerSupported4->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_joycon_right%0); ").arg(theme));
+ } else {
+ ui->controllerSupported4->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme));
+ }
+
+ if (parameters.allow_pro_controller || parameters.allow_gamecube_controller) {
+ ui->controllerSupported5->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme));
+ } else {
+ ui->controllerSupported5->setStyleSheet(
+ QStringLiteral("image: url(:/controller/applet_pro_controller%0_disabled); ")
+ .arg(theme));
+ }
+
+ // enable_single_mode overrides min_players and max_players.
+ if (parameters.enable_single_mode) {
+ ui->numberSupportedLabel->setText(QStringLiteral("1"));
+ return;
+ }
+
+ if (parameters.min_players == parameters.max_players) {
+ ui->numberSupportedLabel->setText(QStringLiteral("%1").arg(parameters.max_players));
+ } else {
+ ui->numberSupportedLabel->setText(
+ QStringLiteral("%1 - %2").arg(parameters.min_players).arg(parameters.max_players));
+ }
+}
+
+void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index) {
+ auto& pairs = index_controller_type_pairs[player_index];
+
+ pairs.clear();
+ emulated_controllers[player_index]->clear();
+
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::ProController);
+ emulated_controllers[player_index]->addItem(tr("Pro Controller"));
+
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::DualJoyconDetached);
+ emulated_controllers[player_index]->addItem(tr("Dual Joycons"));
+
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::LeftJoycon);
+ emulated_controllers[player_index]->addItem(tr("Left Joycon"));
+
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::RightJoycon);
+ emulated_controllers[player_index]->addItem(tr("Right Joycon"));
+
+ if (player_index == 0) {
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::Handheld);
+ emulated_controllers[player_index]->addItem(tr("Handheld"));
+ }
+
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::GameCube);
+ emulated_controllers[player_index]->addItem(tr("GameCube Controller"));
+}
+
+Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex(
+ int index, std::size_t player_index) const {
+ const auto& pairs = index_controller_type_pairs[player_index];
+
+ const auto it = std::find_if(pairs.begin(), pairs.end(),
+ [index](const auto& pair) { return pair.first == index; });
+
+ if (it == pairs.end()) {
+ return Settings::ControllerType::ProController;
+ }
+
+ return it->second;
+}
+
+int QtControllerSelectorDialog::GetIndexFromControllerType(Settings::ControllerType type,
+ std::size_t player_index) const {
+ const auto& pairs = index_controller_type_pairs[player_index];
+
+ const auto it = std::find_if(pairs.begin(), pairs.end(),
+ [type](const auto& pair) { return pair.second == type; });
+
+ if (it == pairs.end()) {
+ return 0;
+ }
+
+ return it->first;
+}
+
+void QtControllerSelectorDialog::UpdateControllerIcon(std::size_t player_index) {
+ if (!player_groupboxes[player_index]->isChecked()) {
+ connected_controller_icons[player_index]->setStyleSheet(QString{});
+ player_labels[player_index]->show();
+ return;
+ }
+
+ const QString stylesheet = [this, player_index] {
+ switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
+ player_index)) {
+ case Settings::ControllerType::ProController:
+ case Settings::ControllerType::GameCube:
+ return QStringLiteral("image: url(:/controller/applet_pro_controller%0); ");
+ case Settings::ControllerType::DualJoyconDetached:
+ return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ");
+ case Settings::ControllerType::LeftJoycon:
+ return QStringLiteral("image: url(:/controller/applet_joycon_left%0); ");
+ case Settings::ControllerType::RightJoycon:
+ return QStringLiteral("image: url(:/controller/applet_joycon_right%0); ");
+ case Settings::ControllerType::Handheld:
+ return QStringLiteral("image: url(:/controller/applet_handheld%0); ");
+ default:
+ return QString{};
+ }
+ }();
+
+ if (stylesheet.isEmpty()) {
+ connected_controller_icons[player_index]->setStyleSheet(QString{});
+ player_labels[player_index]->show();
+ return;
+ }
+
+ const QString theme = [] {
+ if (QIcon::themeName().contains(QStringLiteral("dark"))) {
+ return QStringLiteral("_dark");
+ } else if (QIcon::themeName().contains(QStringLiteral("midnight"))) {
+ return QStringLiteral("_midnight");
+ } else {
+ return QString{};
+ }
+ }();
+
+ connected_controller_icons[player_index]->setStyleSheet(stylesheet.arg(theme));
+ player_labels[player_index]->hide();
+}
+
+void QtControllerSelectorDialog::UpdateControllerState(std::size_t player_index) {
+ auto& player = Settings::values.players.GetValue()[player_index];
+
+ const auto controller_type = GetControllerTypeFromIndex(
+ emulated_controllers[player_index]->currentIndex(), player_index);
+ const auto player_connected = player_groupboxes[player_index]->isChecked() &&
+ controller_type != Settings::ControllerType::Handheld;
+
+ if (player.controller_type == controller_type && player.connected == player_connected) {
+ // Set vibration devices in the event that the input device has changed.
+ ConfigureVibration::SetVibrationDevices(player_index);
+ return;
+ }
+
+ // Disconnect the controller first.
+ UpdateController(controller_type, player_index, false);
+
+ player.controller_type = controller_type;
+ player.connected = player_connected;
+
+ ConfigureVibration::SetVibrationDevices(player_index);
+
+ // Handheld
+ if (player_index == 0) {
+ auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX];
+ if (controller_type == Settings::ControllerType::Handheld) {
+ handheld = player;
+ }
+ handheld.connected = player_groupboxes[player_index]->isChecked() &&
+ controller_type == Settings::ControllerType::Handheld;
+ UpdateController(Settings::ControllerType::Handheld, 8, handheld.connected);
+ }
+
+ if (!player.connected) {
+ return;
+ }
+
+ // This emulates a delay between disconnecting and reconnecting controllers as some games
+ // do not respond to a change in controller type if it was instantaneous.
+ using namespace std::chrono_literals;
+ std::this_thread::sleep_for(60ms);
+
+ UpdateController(controller_type, player_index, player_connected);
+}
+
+void QtControllerSelectorDialog::UpdateLEDPattern(std::size_t player_index) {
+ if (!player_groupboxes[player_index]->isChecked() ||
+ GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
+ player_index) == Settings::ControllerType::Handheld) {
+ led_patterns_boxes[player_index][0]->setChecked(false);
+ led_patterns_boxes[player_index][1]->setChecked(false);
+ led_patterns_boxes[player_index][2]->setChecked(false);
+ led_patterns_boxes[player_index][3]->setChecked(false);
+ return;
+ }
+
+ led_patterns_boxes[player_index][0]->setChecked(led_patterns[player_index][0]);
+ led_patterns_boxes[player_index][1]->setChecked(led_patterns[player_index][1]);
+ led_patterns_boxes[player_index][2]->setChecked(led_patterns[player_index][2]);
+ led_patterns_boxes[player_index][3]->setChecked(led_patterns[player_index][3]);
+}
+
+void QtControllerSelectorDialog::UpdateBorderColor(std::size_t player_index) {
+ if (!parameters.enable_border_color ||
+ player_index >= static_cast<std::size_t>(parameters.max_players) ||
+ player_groupboxes[player_index]->styleSheet().contains(QStringLiteral("QGroupBox"))) {
+ return;
+ }
+
+ player_groupboxes[player_index]->setStyleSheet(
+ player_groupboxes[player_index]->styleSheet().append(
+ QStringLiteral("QGroupBox#groupPlayer%1Connected:checked "
+ "{ border: 1px solid rgba(%2, %3, %4, %5); }")
+ .arg(player_index + 1)
+ .arg(parameters.border_colors[player_index][0])
+ .arg(parameters.border_colors[player_index][1])
+ .arg(parameters.border_colors[player_index][2])
+ .arg(parameters.border_colors[player_index][3])));
+}
+
+void QtControllerSelectorDialog::SetExplainText(std::size_t player_index) {
+ if (!parameters.enable_explain_text ||
+ player_index >= static_cast<std::size_t>(parameters.max_players)) {
+ return;
+ }
+
+ explain_text_labels[player_index]->setText(QString::fromStdString(
+ Common::StringFromFixedZeroTerminatedBuffer(parameters.explain_text[player_index].data(),
+ parameters.explain_text[player_index].size())));
+}
+
+void QtControllerSelectorDialog::UpdateDockedState(bool is_handheld) {
+ // Disallow changing the console mode if the controller type is handheld.
+ ui->radioDocked->setEnabled(!is_handheld);
+ ui->radioUndocked->setEnabled(!is_handheld);
+
+ ui->radioDocked->setChecked(Settings::values.use_docked_mode.GetValue());
+ ui->radioUndocked->setChecked(!Settings::values.use_docked_mode.GetValue());
+
+ // Also force into undocked mode if the controller type is handheld.
+ if (is_handheld) {
+ ui->radioUndocked->setChecked(true);
+ }
+}
+
+void QtControllerSelectorDialog::DisableUnsupportedPlayers() {
+ const auto max_supported_players = parameters.enable_single_mode ? 1 : parameters.max_players;
+
+ switch (max_supported_players) {
+ case 0:
+ default:
+ UNREACHABLE();
+ return;
+ case 1:
+ ui->widgetSpacer->hide();
+ ui->widgetSpacer2->hide();
+ ui->widgetSpacer3->hide();
+ ui->widgetSpacer4->hide();
+ break;
+ case 2:
+ ui->widgetSpacer->hide();
+ ui->widgetSpacer2->hide();
+ ui->widgetSpacer3->hide();
+ break;
+ case 3:
+ ui->widgetSpacer->hide();
+ ui->widgetSpacer2->hide();
+ break;
+ case 4:
+ ui->widgetSpacer->hide();
+ break;
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ break;
+ }
+
+ for (std::size_t index = max_supported_players; index < NUM_PLAYERS; ++index) {
+ // Disconnect any unsupported players here and disable or hide them if applicable.
+ Settings::values.players.GetValue()[index].connected = false;
+ UpdateController(Settings::values.players.GetValue()[index].controller_type, index, false);
+ // Hide the player widgets when max_supported_controllers is less than or equal to 4.
+ if (max_supported_players <= 4) {
+ player_widgets[index]->hide();
+ }
+
+ // Disable and hide the following to prevent these from interaction.
+ player_widgets[index]->setDisabled(true);
+ connected_controller_checkboxes[index]->setDisabled(true);
+ connected_controller_labels[index]->hide();
+ connected_controller_checkboxes[index]->hide();
+ }
+}
+
+QtControllerSelector::QtControllerSelector(GMainWindow& parent) {
+ connect(this, &QtControllerSelector::MainWindowReconfigureControllers, &parent,
+ &GMainWindow::ControllerSelectorReconfigureControllers, Qt::QueuedConnection);
+ connect(&parent, &GMainWindow::ControllerSelectorReconfigureFinished, this,
+ &QtControllerSelector::MainWindowReconfigureFinished, Qt::QueuedConnection);
+}
+
+QtControllerSelector::~QtControllerSelector() = default;
+
+void QtControllerSelector::ReconfigureControllers(
+ std::function<void()> callback_, const Core::Frontend::ControllerParameters& parameters) const {
+ callback = std::move(callback_);
+ emit MainWindowReconfigureControllers(parameters);
+}
+
+void QtControllerSelector::MainWindowReconfigureFinished() {
+ // Acquire the HLE mutex
+ std::lock_guard lock(HLE::g_hle_lock);
+ callback();
+}
diff --git a/src/yuzu/applets/controller.h b/src/yuzu/applets/qt_controller.h
index 9b57aea1a..9b57aea1a 100644
--- a/src/yuzu/applets/controller.h
+++ b/src/yuzu/applets/qt_controller.h
diff --git a/src/yuzu/applets/controller.ui b/src/yuzu/applets/qt_controller.ui
index c8cb6bcf3..c8cb6bcf3 100644
--- a/src/yuzu/applets/controller.ui
+++ b/src/yuzu/applets/qt_controller.ui
diff --git a/src/yuzu/applets/qt_error.cpp b/src/yuzu/applets/qt_error.cpp
new file mode 100644
index 000000000..45cf64603
--- /dev/null
+++ b/src/yuzu/applets/qt_error.cpp
@@ -0,0 +1,63 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <QDateTime>
+#include "core/hle/lock.h"
+#include "yuzu/applets/qt_error.h"
+#include "yuzu/main.h"
+
+QtErrorDisplay::QtErrorDisplay(GMainWindow& parent) {
+ connect(this, &QtErrorDisplay::MainWindowDisplayError, &parent,
+ &GMainWindow::ErrorDisplayDisplayError, Qt::QueuedConnection);
+ connect(&parent, &GMainWindow::ErrorDisplayFinished, this,
+ &QtErrorDisplay::MainWindowFinishedError, Qt::DirectConnection);
+}
+
+QtErrorDisplay::~QtErrorDisplay() = default;
+
+void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
+ callback = std::move(finished);
+ emit MainWindowDisplayError(
+ tr("Error Code: %1-%2 (0x%3)")
+ .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
+ .arg(error.description, 4, 10, QChar::fromLatin1('0'))
+ .arg(error.raw, 8, 16, QChar::fromLatin1('0')),
+ tr("An error has occurred.\nPlease try again or contact the developer of the software."));
+}
+
+void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::seconds time,
+ std::function<void()> finished) const {
+ callback = std::move(finished);
+
+ const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
+ emit MainWindowDisplayError(
+ tr("Error Code: %1-%2 (0x%3)")
+ .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
+ .arg(error.description, 4, 10, QChar::fromLatin1('0'))
+ .arg(error.raw, 8, 16, QChar::fromLatin1('0')),
+ tr("An error occurred on %1 at %2.\nPlease try again or contact the developer of the "
+ "software.")
+ .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy")))
+ .arg(date_time.toString(QStringLiteral("h:mm:ss A"))));
+}
+
+void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_text,
+ std::string fullscreen_text,
+ std::function<void()> finished) const {
+ callback = std::move(finished);
+ emit MainWindowDisplayError(
+ tr("Error Code: %1-%2 (0x%3)")
+ .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
+ .arg(error.description, 4, 10, QChar::fromLatin1('0'))
+ .arg(error.raw, 8, 16, QChar::fromLatin1('0')),
+ tr("An error has occurred.\n\n%1\n\n%2")
+ .arg(QString::fromStdString(dialog_text))
+ .arg(QString::fromStdString(fullscreen_text)));
+}
+
+void QtErrorDisplay::MainWindowFinishedError() {
+ // Acquire the HLE mutex
+ std::lock_guard lock{HLE::g_hle_lock};
+ callback();
+}
diff --git a/src/yuzu/applets/error.h b/src/yuzu/applets/qt_error.h
index 8bd895a32..8bd895a32 100644
--- a/src/yuzu/applets/error.h
+++ b/src/yuzu/applets/qt_error.h
diff --git a/src/yuzu/applets/qt_profile_select.cpp b/src/yuzu/applets/qt_profile_select.cpp
new file mode 100644
index 000000000..a56638e21
--- /dev/null
+++ b/src/yuzu/applets/qt_profile_select.cpp
@@ -0,0 +1,163 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <mutex>
+#include <QDialogButtonBox>
+#include <QHeaderView>
+#include <QLabel>
+#include <QLineEdit>
+#include <QScrollArea>
+#include <QStandardItemModel>
+#include <QVBoxLayout>
+#include "common/fs/path_util.h"
+#include "common/string_util.h"
+#include "core/constants.h"
+#include "core/hle/lock.h"
+#include "yuzu/applets/qt_profile_select.h"
+#include "yuzu/main.h"
+
+namespace {
+QString FormatUserEntryText(const QString& username, Common::UUID uuid) {
+ return QtProfileSelectionDialog::tr(
+ "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. "
+ "00112233-4455-6677-8899-AABBCCDDEEFF))")
+ .arg(username, QString::fromStdString(uuid.FormatSwitch()));
+}
+
+QString GetImagePath(Common::UUID uuid) {
+ const auto path =
+ Common::FS::GetYuzuPath(Common::FS::YuzuPath::NANDDir) /
+ fmt::format("system/save/8000000000000010/su/avators/{}.jpg", uuid.FormatSwitch());
+ return QString::fromStdString(Common::FS::PathToUTF8String(path));
+}
+
+QPixmap GetIcon(Common::UUID uuid) {
+ QPixmap icon{GetImagePath(uuid)};
+
+ if (!icon) {
+ icon.fill(Qt::black);
+ icon.loadFromData(Core::Constants::ACCOUNT_BACKUP_JPEG.data(),
+ static_cast<u32>(Core::Constants::ACCOUNT_BACKUP_JPEG.size()));
+ }
+
+ return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
+}
+} // Anonymous namespace
+
+QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
+ : QDialog(parent), profile_manager(std::make_unique<Service::Account::ProfileManager>()) {
+ outer_layout = new QVBoxLayout;
+
+ instruction_label = new QLabel(tr("Select a user:"));
+
+ scroll_area = new QScrollArea;
+
+ buttons = new QDialogButtonBox(QDialogButtonBox::Cancel | QDialogButtonBox::Ok);
+ connect(buttons, &QDialogButtonBox::accepted, this, &QtProfileSelectionDialog::accept);
+ connect(buttons, &QDialogButtonBox::rejected, this, &QtProfileSelectionDialog::reject);
+
+ outer_layout->addWidget(instruction_label);
+ outer_layout->addWidget(scroll_area);
+ outer_layout->addWidget(buttons);
+
+ layout = new QVBoxLayout;
+ tree_view = new QTreeView;
+ item_model = new QStandardItemModel(tree_view);
+ tree_view->setModel(item_model);
+
+ tree_view->setAlternatingRowColors(true);
+ tree_view->setSelectionMode(QHeaderView::SingleSelection);
+ tree_view->setSelectionBehavior(QHeaderView::SelectRows);
+ tree_view->setVerticalScrollMode(QHeaderView::ScrollPerPixel);
+ tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel);
+ tree_view->setSortingEnabled(true);
+ tree_view->setEditTriggers(QHeaderView::NoEditTriggers);
+ tree_view->setUniformRowHeights(true);
+ tree_view->setIconSize({64, 64});
+ tree_view->setContextMenuPolicy(Qt::NoContextMenu);
+
+ item_model->insertColumns(0, 1);
+ item_model->setHeaderData(0, Qt::Horizontal, tr("Users"));
+
+ // We must register all custom types with the Qt Automoc system so that we are able to use it
+ // with signals/slots. In this case, QList falls under the umbrella of custom types.
+ qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
+
+ layout->setContentsMargins(0, 0, 0, 0);
+ layout->setSpacing(0);
+ layout->addWidget(tree_view);
+
+ scroll_area->setLayout(layout);
+
+ connect(tree_view, &QTreeView::clicked, this, &QtProfileSelectionDialog::SelectUser);
+
+ const auto& profiles = profile_manager->GetAllUsers();
+ for (const auto& user : profiles) {
+ Service::Account::ProfileBase profile{};
+ if (!profile_manager->GetProfileBase(user, profile))
+ continue;
+
+ const auto username = Common::StringFromFixedZeroTerminatedBuffer(
+ reinterpret_cast<const char*>(profile.username.data()), profile.username.size());
+
+ list_items.push_back(QList<QStandardItem*>{new QStandardItem{
+ GetIcon(user), FormatUserEntryText(QString::fromStdString(username), user)}});
+ }
+
+ for (const auto& item : list_items)
+ item_model->appendRow(item);
+
+ setLayout(outer_layout);
+ setWindowTitle(tr("Profile Selector"));
+ resize(550, 400);
+}
+
+QtProfileSelectionDialog::~QtProfileSelectionDialog() = default;
+
+int QtProfileSelectionDialog::exec() {
+ // Skip profile selection when there's only one.
+ if (profile_manager->GetUserCount() == 1) {
+ user_index = 0;
+ return QDialog::Accepted;
+ }
+ return QDialog::exec();
+}
+
+void QtProfileSelectionDialog::accept() {
+ QDialog::accept();
+}
+
+void QtProfileSelectionDialog::reject() {
+ user_index = 0;
+ QDialog::reject();
+}
+
+int QtProfileSelectionDialog::GetIndex() const {
+ return user_index;
+}
+
+void QtProfileSelectionDialog::SelectUser(const QModelIndex& index) {
+ user_index = index.row();
+}
+
+QtProfileSelector::QtProfileSelector(GMainWindow& parent) {
+ connect(this, &QtProfileSelector::MainWindowSelectProfile, &parent,
+ &GMainWindow::ProfileSelectorSelectProfile, Qt::QueuedConnection);
+ connect(&parent, &GMainWindow::ProfileSelectorFinishedSelection, this,
+ &QtProfileSelector::MainWindowFinishedSelection, Qt::DirectConnection);
+}
+
+QtProfileSelector::~QtProfileSelector() = default;
+
+void QtProfileSelector::SelectProfile(
+ std::function<void(std::optional<Common::UUID>)> callback_) const {
+ callback = std::move(callback_);
+ emit MainWindowSelectProfile();
+}
+
+void QtProfileSelector::MainWindowFinishedSelection(std::optional<Common::UUID> uuid) {
+ // Acquire the HLE mutex
+ std::lock_guard lock{HLE::g_hle_lock};
+ callback(uuid);
+}
diff --git a/src/yuzu/applets/profile_select.h b/src/yuzu/applets/qt_profile_select.h
index 4e9037488..4e9037488 100644
--- a/src/yuzu/applets/profile_select.h
+++ b/src/yuzu/applets/qt_profile_select.h
diff --git a/src/yuzu/applets/qt_software_keyboard.cpp b/src/yuzu/applets/qt_software_keyboard.cpp
new file mode 100644
index 000000000..848801cec
--- /dev/null
+++ b/src/yuzu/applets/qt_software_keyboard.cpp
@@ -0,0 +1,1620 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <QCursor>
+#include <QKeyEvent>
+#include <QScreen>
+
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/frontend/input_interpreter.h"
+#include "ui_qt_software_keyboard.h"
+#include "yuzu/applets/qt_software_keyboard.h"
+#include "yuzu/main.h"
+#include "yuzu/util/overlay_dialog.h"
+
+namespace {
+
+using namespace Service::AM::Applets;
+
+constexpr float BASE_HEADER_FONT_SIZE = 23.0f;
+constexpr float BASE_SUB_FONT_SIZE = 17.0f;
+constexpr float BASE_EDITOR_FONT_SIZE = 26.0f;
+constexpr float BASE_CHAR_BUTTON_FONT_SIZE = 28.0f;
+constexpr float BASE_LABEL_BUTTON_FONT_SIZE = 18.0f;
+constexpr float BASE_ICON_BUTTON_SIZE = 36.0f;
+[[maybe_unused]] constexpr float BASE_WIDTH = 1280.0f;
+constexpr float BASE_HEIGHT = 720.0f;
+
+} // Anonymous namespace
+
+QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
+ QWidget* parent, Core::System& system_, bool is_inline_,
+ Core::Frontend::KeyboardInitializeParameters initialize_parameters_)
+ : QDialog(parent), ui{std::make_unique<Ui::QtSoftwareKeyboardDialog>()}, system{system_},
+ is_inline{is_inline_}, initialize_parameters{std::move(initialize_parameters_)} {
+ ui->setupUi(this);
+
+ setWindowFlags(Qt::Dialog | Qt::FramelessWindowHint | Qt::WindowTitleHint |
+ Qt::WindowSystemMenuHint | Qt::CustomizeWindowHint);
+ setWindowModality(Qt::WindowModal);
+ setAttribute(Qt::WA_DeleteOnClose);
+ setAttribute(Qt::WA_TranslucentBackground);
+
+ keyboard_buttons = {{
+ {{
+ {
+ ui->button_1,
+ ui->button_2,
+ ui->button_3,
+ ui->button_4,
+ ui->button_5,
+ ui->button_6,
+ ui->button_7,
+ ui->button_8,
+ ui->button_9,
+ ui->button_0,
+ ui->button_minus,
+ ui->button_backspace,
+ },
+ {
+ ui->button_q,
+ ui->button_w,
+ ui->button_e,
+ ui->button_r,
+ ui->button_t,
+ ui->button_y,
+ ui->button_u,
+ ui->button_i,
+ ui->button_o,
+ ui->button_p,
+ ui->button_slash,
+ ui->button_return,
+ },
+ {
+ ui->button_a,
+ ui->button_s,
+ ui->button_d,
+ ui->button_f,
+ ui->button_g,
+ ui->button_h,
+ ui->button_j,
+ ui->button_k,
+ ui->button_l,
+ ui->button_colon,
+ ui->button_apostrophe,
+ ui->button_return,
+ },
+ {
+ ui->button_z,
+ ui->button_x,
+ ui->button_c,
+ ui->button_v,
+ ui->button_b,
+ ui->button_n,
+ ui->button_m,
+ ui->button_comma,
+ ui->button_dot,
+ ui->button_question,
+ ui->button_exclamation,
+ ui->button_ok,
+ },
+ {
+ ui->button_shift,
+ ui->button_shift,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_space,
+ ui->button_ok,
+ },
+ }},
+ {{
+ {
+ ui->button_hash,
+ ui->button_left_bracket,
+ ui->button_right_bracket,
+ ui->button_dollar,
+ ui->button_percent,
+ ui->button_circumflex,
+ ui->button_ampersand,
+ ui->button_asterisk,
+ ui->button_left_parenthesis,
+ ui->button_right_parenthesis,
+ ui->button_underscore,
+ ui->button_backspace_shift,
+ },
+ {
+ ui->button_q_shift,
+ ui->button_w_shift,
+ ui->button_e_shift,
+ ui->button_r_shift,
+ ui->button_t_shift,
+ ui->button_y_shift,
+ ui->button_u_shift,
+ ui->button_i_shift,
+ ui->button_o_shift,
+ ui->button_p_shift,
+ ui->button_at,
+ ui->button_return_shift,
+ },
+ {
+ ui->button_a_shift,
+ ui->button_s_shift,
+ ui->button_d_shift,
+ ui->button_f_shift,
+ ui->button_g_shift,
+ ui->button_h_shift,
+ ui->button_j_shift,
+ ui->button_k_shift,
+ ui->button_l_shift,
+ ui->button_semicolon,
+ ui->button_quotation,
+ ui->button_return_shift,
+ },
+ {
+ ui->button_z_shift,
+ ui->button_x_shift,
+ ui->button_c_shift,
+ ui->button_v_shift,
+ ui->button_b_shift,
+ ui->button_n_shift,
+ ui->button_m_shift,
+ ui->button_less_than,
+ ui->button_greater_than,
+ ui->button_plus,
+ ui->button_equal,
+ ui->button_ok_shift,
+ },
+ {
+ ui->button_shift_shift,
+ ui->button_shift_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_space_shift,
+ ui->button_ok_shift,
+ },
+ }},
+ }};
+
+ numberpad_buttons = {{
+ {
+ ui->button_1_num,
+ ui->button_2_num,
+ ui->button_3_num,
+ ui->button_backspace_num,
+ },
+ {
+ ui->button_4_num,
+ ui->button_5_num,
+ ui->button_6_num,
+ ui->button_ok_num,
+ },
+ {
+ ui->button_7_num,
+ ui->button_8_num,
+ ui->button_9_num,
+ ui->button_ok_num,
+ },
+ {
+ nullptr,
+ ui->button_0_num,
+ nullptr,
+ ui->button_ok_num,
+ },
+ }};
+
+ all_buttons = {
+ ui->button_1,
+ ui->button_2,
+ ui->button_3,
+ ui->button_4,
+ ui->button_5,
+ ui->button_6,
+ ui->button_7,
+ ui->button_8,
+ ui->button_9,
+ ui->button_0,
+ ui->button_minus,
+ ui->button_backspace,
+ ui->button_q,
+ ui->button_w,
+ ui->button_e,
+ ui->button_r,
+ ui->button_t,
+ ui->button_y,
+ ui->button_u,
+ ui->button_i,
+ ui->button_o,
+ ui->button_p,
+ ui->button_slash,
+ ui->button_return,
+ ui->button_a,
+ ui->button_s,
+ ui->button_d,
+ ui->button_f,
+ ui->button_g,
+ ui->button_h,
+ ui->button_j,
+ ui->button_k,
+ ui->button_l,
+ ui->button_colon,
+ ui->button_apostrophe,
+ ui->button_z,
+ ui->button_x,
+ ui->button_c,
+ ui->button_v,
+ ui->button_b,
+ ui->button_n,
+ ui->button_m,
+ ui->button_comma,
+ ui->button_dot,
+ ui->button_question,
+ ui->button_exclamation,
+ ui->button_ok,
+ ui->button_shift,
+ ui->button_space,
+ ui->button_hash,
+ ui->button_left_bracket,
+ ui->button_right_bracket,
+ ui->button_dollar,
+ ui->button_percent,
+ ui->button_circumflex,
+ ui->button_ampersand,
+ ui->button_asterisk,
+ ui->button_left_parenthesis,
+ ui->button_right_parenthesis,
+ ui->button_underscore,
+ ui->button_backspace_shift,
+ ui->button_q_shift,
+ ui->button_w_shift,
+ ui->button_e_shift,
+ ui->button_r_shift,
+ ui->button_t_shift,
+ ui->button_y_shift,
+ ui->button_u_shift,
+ ui->button_i_shift,
+ ui->button_o_shift,
+ ui->button_p_shift,
+ ui->button_at,
+ ui->button_return_shift,
+ ui->button_a_shift,
+ ui->button_s_shift,
+ ui->button_d_shift,
+ ui->button_f_shift,
+ ui->button_g_shift,
+ ui->button_h_shift,
+ ui->button_j_shift,
+ ui->button_k_shift,
+ ui->button_l_shift,
+ ui->button_semicolon,
+ ui->button_quotation,
+ ui->button_z_shift,
+ ui->button_x_shift,
+ ui->button_c_shift,
+ ui->button_v_shift,
+ ui->button_b_shift,
+ ui->button_n_shift,
+ ui->button_m_shift,
+ ui->button_less_than,
+ ui->button_greater_than,
+ ui->button_plus,
+ ui->button_equal,
+ ui->button_ok_shift,
+ ui->button_shift_shift,
+ ui->button_space_shift,
+ ui->button_1_num,
+ ui->button_2_num,
+ ui->button_3_num,
+ ui->button_backspace_num,
+ ui->button_4_num,
+ ui->button_5_num,
+ ui->button_6_num,
+ ui->button_ok_num,
+ ui->button_7_num,
+ ui->button_8_num,
+ ui->button_9_num,
+ ui->button_0_num,
+ };
+
+ SetupMouseHover();
+
+ if (!initialize_parameters.ok_text.empty()) {
+ ui->button_ok->setText(QString::fromStdU16String(initialize_parameters.ok_text));
+ }
+
+ ui->label_header->setText(QString::fromStdU16String(initialize_parameters.header_text));
+ ui->label_sub->setText(QString::fromStdU16String(initialize_parameters.sub_text));
+
+ current_text = initialize_parameters.initial_text;
+ cursor_position = initialize_parameters.initial_cursor_position;
+
+ SetTextDrawType();
+
+ for (auto* button : all_buttons) {
+ connect(button, &QPushButton::clicked, this, [this, button](bool) {
+ if (is_inline) {
+ InlineKeyboardButtonClicked(button);
+ } else {
+ NormalKeyboardButtonClicked(button);
+ }
+ });
+ }
+
+ // TODO (Morph): Remove this when InputInterpreter no longer relies on the HID backend
+ if (system.IsPoweredOn()) {
+ input_interpreter = std::make_unique<InputInterpreter>(system);
+ }
+}
+
+QtSoftwareKeyboardDialog::~QtSoftwareKeyboardDialog() {
+ StopInputThread();
+}
+
+void QtSoftwareKeyboardDialog::ShowNormalKeyboard(QPoint pos, QSize size) {
+ if (isVisible()) {
+ return;
+ }
+
+ MoveAndResizeWindow(pos, size);
+
+ SetKeyboardType();
+ SetPasswordMode();
+ SetControllerImage();
+ DisableKeyboardButtons();
+ SetBackspaceOkEnabled();
+
+ open();
+}
+
+void QtSoftwareKeyboardDialog::ShowTextCheckDialog(
+ Service::AM::Applets::SwkbdTextCheckResult text_check_result,
+ std::u16string text_check_message) {
+ switch (text_check_result) {
+ case SwkbdTextCheckResult::Success:
+ case SwkbdTextCheckResult::Silent:
+ default:
+ break;
+ case SwkbdTextCheckResult::Failure: {
+ StopInputThread();
+
+ OverlayDialog dialog(this, system, QString{}, QString::fromStdU16String(text_check_message),
+ QString{}, tr("OK"), Qt::AlignCenter);
+ dialog.exec();
+
+ StartInputThread();
+ break;
+ }
+ case SwkbdTextCheckResult::Confirm: {
+ StopInputThread();
+
+ OverlayDialog dialog(this, system, QString{}, QString::fromStdU16String(text_check_message),
+ tr("Cancel"), tr("OK"), Qt::AlignCenter);
+ if (dialog.exec() != QDialog::Accepted) {
+ StartInputThread();
+ break;
+ }
+
+ auto text = ui->topOSK->currentIndex() == 1
+ ? ui->text_edit_osk->toPlainText().toStdU16String()
+ : ui->line_edit_osk->text().toStdU16String();
+
+ emit SubmitNormalText(SwkbdResult::Ok, std::move(text));
+ break;
+ }
+ }
+}
+
+void QtSoftwareKeyboardDialog::ShowInlineKeyboard(
+ Core::Frontend::InlineAppearParameters appear_parameters, QPoint pos, QSize size) {
+ MoveAndResizeWindow(pos, size);
+
+ ui->topOSK->setStyleSheet(QStringLiteral("background: rgba(0, 0, 0, 0);"));
+
+ ui->headerOSK->hide();
+ ui->subOSK->hide();
+ ui->inputOSK->hide();
+ ui->charactersOSK->hide();
+ ui->inputBoxOSK->hide();
+ ui->charactersBoxOSK->hide();
+
+ initialize_parameters.max_text_length = appear_parameters.max_text_length;
+ initialize_parameters.min_text_length = appear_parameters.min_text_length;
+ initialize_parameters.type = appear_parameters.type;
+ initialize_parameters.key_disable_flags = appear_parameters.key_disable_flags;
+ initialize_parameters.enable_backspace_button = appear_parameters.enable_backspace_button;
+ initialize_parameters.enable_return_button = appear_parameters.enable_return_button;
+ initialize_parameters.disable_cancel_button = initialize_parameters.disable_cancel_button;
+
+ SetKeyboardType();
+ SetControllerImage();
+ DisableKeyboardButtons();
+ SetBackspaceOkEnabled();
+
+ open();
+}
+
+void QtSoftwareKeyboardDialog::HideInlineKeyboard() {
+ StopInputThread();
+ QDialog::hide();
+}
+
+void QtSoftwareKeyboardDialog::InlineTextChanged(
+ Core::Frontend::InlineTextParameters text_parameters) {
+ current_text = text_parameters.input_text;
+ cursor_position = text_parameters.cursor_position;
+
+ SetBackspaceOkEnabled();
+}
+
+void QtSoftwareKeyboardDialog::ExitKeyboard() {
+ StopInputThread();
+ QDialog::done(QDialog::Accepted);
+}
+
+void QtSoftwareKeyboardDialog::open() {
+ QDialog::open();
+
+ row = 0;
+ column = 0;
+
+ const auto* const curr_button =
+ keyboard_buttons[static_cast<int>(bottom_osk_index)][row][column];
+
+ // This is a workaround for setFocus() randomly not showing focus in the UI
+ QCursor::setPos(curr_button->mapToGlobal(curr_button->rect().center()));
+
+ StartInputThread();
+}
+
+void QtSoftwareKeyboardDialog::reject() {
+ // Pressing the ESC key in a dialog calls QDialog::reject().
+ // We will override this behavior to the "Cancel" action on the software keyboard.
+ TranslateButtonPress(HIDButton::X);
+}
+
+void QtSoftwareKeyboardDialog::keyPressEvent(QKeyEvent* event) {
+ if (!is_inline) {
+ QDialog::keyPressEvent(event);
+ return;
+ }
+
+ const auto entered_key = event->key();
+
+ switch (entered_key) {
+ case Qt::Key_Escape:
+ QDialog::keyPressEvent(event);
+ return;
+ case Qt::Key_Backspace:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ ui->button_backspace->click();
+ break;
+ case BottomOSKIndex::UpperCase:
+ ui->button_backspace_shift->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ ui->button_backspace_num->click();
+ break;
+ default:
+ break;
+ }
+ return;
+ case Qt::Key_Return:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ ui->button_ok->click();
+ break;
+ case BottomOSKIndex::UpperCase:
+ ui->button_ok_shift->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ ui->button_ok_num->click();
+ break;
+ default:
+ break;
+ }
+ return;
+ case Qt::Key_Left:
+ MoveTextCursorDirection(Direction::Left);
+ return;
+ case Qt::Key_Right:
+ MoveTextCursorDirection(Direction::Right);
+ return;
+ default:
+ break;
+ }
+
+ const auto entered_text = event->text();
+
+ if (entered_text.isEmpty()) {
+ return;
+ }
+
+ InlineTextInsertString(entered_text.toStdU16String());
+}
+
+void QtSoftwareKeyboardDialog::MoveAndResizeWindow(QPoint pos, QSize size) {
+ QDialog::move(pos);
+ QDialog::resize(size);
+
+ // High DPI
+ const float dpi_scale = qApp->screenAt(pos)->logicalDotsPerInch() / 96.0f;
+
+ RescaleKeyboardElements(size.width(), size.height(), dpi_scale);
+}
+
+void QtSoftwareKeyboardDialog::RescaleKeyboardElements(float width, float height, float dpi_scale) {
+ const auto header_font_size = BASE_HEADER_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
+ const auto sub_font_size = BASE_SUB_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
+ const auto editor_font_size = BASE_EDITOR_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
+ const auto char_button_font_size =
+ BASE_CHAR_BUTTON_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
+ const auto label_button_font_size =
+ BASE_LABEL_BUTTON_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
+
+ QFont header_font(QStringLiteral("MS Shell Dlg 2"), header_font_size, QFont::Normal);
+ QFont sub_font(QStringLiteral("MS Shell Dlg 2"), sub_font_size, QFont::Normal);
+ QFont editor_font(QStringLiteral("MS Shell Dlg 2"), editor_font_size, QFont::Normal);
+ QFont char_button_font(QStringLiteral("MS Shell Dlg 2"), char_button_font_size, QFont::Normal);
+ QFont label_button_font(QStringLiteral("MS Shell Dlg 2"), label_button_font_size,
+ QFont::Normal);
+
+ ui->label_header->setFont(header_font);
+ ui->label_sub->setFont(sub_font);
+ ui->line_edit_osk->setFont(editor_font);
+ ui->text_edit_osk->setFont(editor_font);
+ ui->label_characters->setFont(sub_font);
+ ui->label_characters_box->setFont(sub_font);
+
+ ui->label_shift->setFont(label_button_font);
+ ui->label_shift_shift->setFont(label_button_font);
+ ui->label_cancel->setFont(label_button_font);
+ ui->label_cancel_shift->setFont(label_button_font);
+ ui->label_cancel_num->setFont(label_button_font);
+ ui->label_enter->setFont(label_button_font);
+ ui->label_enter_shift->setFont(label_button_font);
+ ui->label_enter_num->setFont(label_button_font);
+
+ for (auto* button : all_buttons) {
+ if (button == ui->button_return || button == ui->button_return_shift) {
+ button->setFont(label_button_font);
+ continue;
+ }
+
+ if (button == ui->button_space || button == ui->button_space_shift) {
+ button->setFont(label_button_font);
+ continue;
+ }
+
+ if (button == ui->button_shift || button == ui->button_shift_shift) {
+ button->setFont(label_button_font);
+ button->setIconSize(QSize(BASE_ICON_BUTTON_SIZE, BASE_ICON_BUTTON_SIZE) *
+ (height / BASE_HEIGHT));
+ continue;
+ }
+
+ if (button == ui->button_backspace || button == ui->button_backspace_shift ||
+ button == ui->button_backspace_num) {
+ button->setFont(label_button_font);
+ button->setIconSize(QSize(BASE_ICON_BUTTON_SIZE, BASE_ICON_BUTTON_SIZE) *
+ (height / BASE_HEIGHT));
+ continue;
+ }
+
+ if (button == ui->button_ok || button == ui->button_ok_shift ||
+ button == ui->button_ok_num) {
+ button->setFont(label_button_font);
+ continue;
+ }
+
+ button->setFont(char_button_font);
+ }
+}
+
+void QtSoftwareKeyboardDialog::SetKeyboardType() {
+ switch (initialize_parameters.type) {
+ case SwkbdType::Normal:
+ case SwkbdType::Qwerty:
+ case SwkbdType::Unknown3:
+ case SwkbdType::Latin:
+ case SwkbdType::SimplifiedChinese:
+ case SwkbdType::TraditionalChinese:
+ case SwkbdType::Korean:
+ default: {
+ bottom_osk_index = BottomOSKIndex::LowerCase;
+ ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
+
+ ui->verticalLayout_2->setStretch(0, 320);
+ ui->verticalLayout_2->setStretch(1, 400);
+
+ ui->gridLineOSK->setRowStretch(5, 94);
+ ui->gridBoxOSK->setRowStretch(2, 81);
+ break;
+ }
+ case SwkbdType::NumberPad: {
+ bottom_osk_index = BottomOSKIndex::NumberPad;
+ ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
+
+ ui->verticalLayout_2->setStretch(0, 370);
+ ui->verticalLayout_2->setStretch(1, 350);
+
+ ui->gridLineOSK->setRowStretch(5, 144);
+ ui->gridBoxOSK->setRowStretch(2, 131);
+ break;
+ }
+ }
+}
+
+void QtSoftwareKeyboardDialog::SetPasswordMode() {
+ switch (initialize_parameters.password_mode) {
+ case SwkbdPasswordMode::Disabled:
+ default:
+ ui->line_edit_osk->setEchoMode(QLineEdit::Normal);
+ break;
+ case SwkbdPasswordMode::Enabled:
+ ui->line_edit_osk->setEchoMode(QLineEdit::Password);
+ break;
+ }
+}
+
+void QtSoftwareKeyboardDialog::SetTextDrawType() {
+ switch (initialize_parameters.text_draw_type) {
+ case SwkbdTextDrawType::Line:
+ case SwkbdTextDrawType::DownloadCode: {
+ ui->topOSK->setCurrentIndex(0);
+
+ if (initialize_parameters.max_text_length <= 10) {
+ ui->gridLineOSK->setColumnStretch(0, 390);
+ ui->gridLineOSK->setColumnStretch(1, 500);
+ ui->gridLineOSK->setColumnStretch(2, 390);
+ } else {
+ ui->gridLineOSK->setColumnStretch(0, 130);
+ ui->gridLineOSK->setColumnStretch(1, 1020);
+ ui->gridLineOSK->setColumnStretch(2, 130);
+ }
+
+ if (is_inline) {
+ return;
+ }
+
+ connect(ui->line_edit_osk, &QLineEdit::textChanged, [this](const QString& changed_string) {
+ const auto is_valid = ValidateInputText(changed_string);
+
+ const auto text_length = static_cast<u32>(changed_string.length());
+
+ ui->label_characters->setText(QStringLiteral("%1/%2")
+ .arg(text_length)
+ .arg(initialize_parameters.max_text_length));
+
+ ui->button_ok->setEnabled(is_valid);
+ ui->button_ok_shift->setEnabled(is_valid);
+ ui->button_ok_num->setEnabled(is_valid);
+
+ ui->line_edit_osk->setFocus();
+ });
+
+ connect(ui->line_edit_osk, &QLineEdit::cursorPositionChanged,
+ [this](int old_cursor_position, int new_cursor_position) {
+ ui->button_backspace->setEnabled(
+ initialize_parameters.enable_backspace_button && new_cursor_position > 0);
+ ui->button_backspace_shift->setEnabled(
+ initialize_parameters.enable_backspace_button && new_cursor_position > 0);
+ ui->button_backspace_num->setEnabled(
+ initialize_parameters.enable_backspace_button && new_cursor_position > 0);
+
+ ui->line_edit_osk->setFocus();
+ });
+
+ connect(
+ ui->line_edit_osk, &QLineEdit::returnPressed, this,
+ [this] { TranslateButtonPress(HIDButton::Plus); }, Qt::QueuedConnection);
+
+ ui->line_edit_osk->setPlaceholderText(
+ QString::fromStdU16String(initialize_parameters.guide_text));
+ ui->line_edit_osk->setText(QString::fromStdU16String(initialize_parameters.initial_text));
+ ui->line_edit_osk->setMaxLength(initialize_parameters.max_text_length);
+ ui->line_edit_osk->setCursorPosition(initialize_parameters.initial_cursor_position);
+
+ ui->label_characters->setText(QStringLiteral("%1/%2")
+ .arg(initialize_parameters.initial_text.size())
+ .arg(initialize_parameters.max_text_length));
+ break;
+ }
+ case SwkbdTextDrawType::Box:
+ default: {
+ ui->topOSK->setCurrentIndex(1);
+
+ if (is_inline) {
+ return;
+ }
+
+ connect(ui->text_edit_osk, &QTextEdit::textChanged, [this] {
+ if (static_cast<u32>(ui->text_edit_osk->toPlainText().length()) >
+ initialize_parameters.max_text_length) {
+ auto text_cursor = ui->text_edit_osk->textCursor();
+ ui->text_edit_osk->setTextCursor(text_cursor);
+ text_cursor.deletePreviousChar();
+ }
+
+ const auto is_valid = ValidateInputText(ui->text_edit_osk->toPlainText());
+
+ const auto text_length = static_cast<u32>(ui->text_edit_osk->toPlainText().length());
+
+ ui->label_characters_box->setText(QStringLiteral("%1/%2")
+ .arg(text_length)
+ .arg(initialize_parameters.max_text_length));
+
+ ui->button_ok->setEnabled(is_valid);
+ ui->button_ok_shift->setEnabled(is_valid);
+ ui->button_ok_num->setEnabled(is_valid);
+
+ ui->text_edit_osk->setFocus();
+ });
+
+ connect(ui->text_edit_osk, &QTextEdit::cursorPositionChanged, [this] {
+ const auto new_cursor_position = ui->text_edit_osk->textCursor().position();
+
+ ui->button_backspace->setEnabled(initialize_parameters.enable_backspace_button &&
+ new_cursor_position > 0);
+ ui->button_backspace_shift->setEnabled(initialize_parameters.enable_backspace_button &&
+ new_cursor_position > 0);
+ ui->button_backspace_num->setEnabled(initialize_parameters.enable_backspace_button &&
+ new_cursor_position > 0);
+
+ ui->text_edit_osk->setFocus();
+ });
+
+ ui->text_edit_osk->setPlaceholderText(
+ QString::fromStdU16String(initialize_parameters.guide_text));
+ ui->text_edit_osk->setText(QString::fromStdU16String(initialize_parameters.initial_text));
+ ui->text_edit_osk->moveCursor(initialize_parameters.initial_cursor_position == 0
+ ? QTextCursor::Start
+ : QTextCursor::End);
+
+ ui->label_characters_box->setText(QStringLiteral("%1/%2")
+ .arg(initialize_parameters.initial_text.size())
+ .arg(initialize_parameters.max_text_length));
+ break;
+ }
+ }
+}
+
+void QtSoftwareKeyboardDialog::SetControllerImage() {
+ const auto controller_type = Settings::values.players.GetValue()[8].connected
+ ? Settings::values.players.GetValue()[8].controller_type
+ : Settings::values.players.GetValue()[0].controller_type;
+
+ const QString theme = [] {
+ if (QIcon::themeName().contains(QStringLiteral("dark")) ||
+ QIcon::themeName().contains(QStringLiteral("midnight"))) {
+ return QStringLiteral("_dark");
+ } else {
+ return QString{};
+ }
+ }();
+
+ switch (controller_type) {
+ case Settings::ControllerType::ProController:
+ case Settings::ControllerType::GameCube:
+ ui->icon_controller->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_pro%1.png);").arg(theme));
+ ui->icon_controller_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_pro%1.png);").arg(theme));
+ ui->icon_controller_num->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_pro%1.png);").arg(theme));
+ break;
+ case Settings::ControllerType::DualJoyconDetached:
+ ui->icon_controller->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_dual_joycon%1.png);").arg(theme));
+ ui->icon_controller_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_dual_joycon%1.png);").arg(theme));
+ ui->icon_controller_num->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_dual_joycon%1.png);").arg(theme));
+ break;
+ case Settings::ControllerType::LeftJoycon:
+ ui->icon_controller->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_single_joycon_left%1.png);")
+ .arg(theme));
+ ui->icon_controller_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_single_joycon_left%1.png);")
+ .arg(theme));
+ ui->icon_controller_num->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_single_joycon_left%1.png);")
+ .arg(theme));
+ break;
+ case Settings::ControllerType::RightJoycon:
+ ui->icon_controller->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_single_joycon_right%1.png);")
+ .arg(theme));
+ ui->icon_controller_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_single_joycon_right%1.png);")
+ .arg(theme));
+ ui->icon_controller_num->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_single_joycon_right%1.png);")
+ .arg(theme));
+ break;
+ case Settings::ControllerType::Handheld:
+ ui->icon_controller->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_handheld%1.png);").arg(theme));
+ ui->icon_controller_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_handheld%1.png);").arg(theme));
+ ui->icon_controller_num->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/controller_handheld%1.png);").arg(theme));
+ break;
+ default:
+ break;
+ }
+}
+
+void QtSoftwareKeyboardDialog::DisableKeyboardButtons() {
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ case BottomOSKIndex::UpperCase:
+ default: {
+ for (const auto& keys : keyboard_buttons) {
+ for (const auto& rows : keys) {
+ for (auto* button : rows) {
+ if (!button) {
+ continue;
+ }
+
+ button->setEnabled(true);
+ }
+ }
+ }
+
+ const auto& key_disable_flags = initialize_parameters.key_disable_flags;
+
+ ui->button_space->setDisabled(key_disable_flags.space);
+ ui->button_space_shift->setDisabled(key_disable_flags.space);
+
+ ui->button_at->setDisabled(key_disable_flags.at || key_disable_flags.username);
+
+ ui->button_percent->setDisabled(key_disable_flags.percent || key_disable_flags.username);
+
+ ui->button_slash->setDisabled(key_disable_flags.slash);
+
+ ui->button_1->setDisabled(key_disable_flags.numbers);
+ ui->button_2->setDisabled(key_disable_flags.numbers);
+ ui->button_3->setDisabled(key_disable_flags.numbers);
+ ui->button_4->setDisabled(key_disable_flags.numbers);
+ ui->button_5->setDisabled(key_disable_flags.numbers);
+ ui->button_6->setDisabled(key_disable_flags.numbers);
+ ui->button_7->setDisabled(key_disable_flags.numbers);
+ ui->button_8->setDisabled(key_disable_flags.numbers);
+ ui->button_9->setDisabled(key_disable_flags.numbers);
+ ui->button_0->setDisabled(key_disable_flags.numbers);
+
+ ui->button_return->setEnabled(initialize_parameters.enable_return_button);
+ ui->button_return_shift->setEnabled(initialize_parameters.enable_return_button);
+ break;
+ }
+ case BottomOSKIndex::NumberPad: {
+ for (const auto& rows : numberpad_buttons) {
+ for (auto* button : rows) {
+ if (!button) {
+ continue;
+ }
+
+ button->setEnabled(true);
+ }
+ }
+ break;
+ }
+ }
+}
+
+void QtSoftwareKeyboardDialog::SetBackspaceOkEnabled() {
+ if (is_inline) {
+ ui->button_ok->setEnabled(current_text.size() >= initialize_parameters.min_text_length);
+ ui->button_ok_shift->setEnabled(current_text.size() >=
+ initialize_parameters.min_text_length);
+ ui->button_ok_num->setEnabled(current_text.size() >= initialize_parameters.min_text_length);
+
+ ui->button_backspace->setEnabled(initialize_parameters.enable_backspace_button &&
+ cursor_position > 0);
+ ui->button_backspace_shift->setEnabled(initialize_parameters.enable_backspace_button &&
+ cursor_position > 0);
+ ui->button_backspace_num->setEnabled(initialize_parameters.enable_backspace_button &&
+ cursor_position > 0);
+ } else {
+ const auto text_length = [this] {
+ if (ui->topOSK->currentIndex() == 1) {
+ return static_cast<u32>(ui->text_edit_osk->toPlainText().length());
+ } else {
+ return static_cast<u32>(ui->line_edit_osk->text().length());
+ }
+ }();
+
+ const auto normal_cursor_position = [this] {
+ if (ui->topOSK->currentIndex() == 1) {
+ return ui->text_edit_osk->textCursor().position();
+ } else {
+ return ui->line_edit_osk->cursorPosition();
+ }
+ }();
+
+ ui->button_ok->setEnabled(text_length >= initialize_parameters.min_text_length);
+ ui->button_ok_shift->setEnabled(text_length >= initialize_parameters.min_text_length);
+ ui->button_ok_num->setEnabled(text_length >= initialize_parameters.min_text_length);
+
+ ui->button_backspace->setEnabled(initialize_parameters.enable_backspace_button &&
+ normal_cursor_position > 0);
+ ui->button_backspace_shift->setEnabled(initialize_parameters.enable_backspace_button &&
+ normal_cursor_position > 0);
+ ui->button_backspace_num->setEnabled(initialize_parameters.enable_backspace_button &&
+ normal_cursor_position > 0);
+ }
+}
+
+bool QtSoftwareKeyboardDialog::ValidateInputText(const QString& input_text) {
+ const auto& key_disable_flags = initialize_parameters.key_disable_flags;
+
+ const auto input_text_length = static_cast<u32>(input_text.length());
+
+ if (input_text_length < initialize_parameters.min_text_length ||
+ input_text_length > initialize_parameters.max_text_length) {
+ return false;
+ }
+
+ if (key_disable_flags.space && input_text.contains(QLatin1Char{' '})) {
+ return false;
+ }
+
+ if ((key_disable_flags.at || key_disable_flags.username) &&
+ input_text.contains(QLatin1Char{'@'})) {
+ return false;
+ }
+
+ if ((key_disable_flags.percent || key_disable_flags.username) &&
+ input_text.contains(QLatin1Char{'%'})) {
+ return false;
+ }
+
+ if (key_disable_flags.slash && input_text.contains(QLatin1Char{'/'})) {
+ return false;
+ }
+
+ if ((key_disable_flags.backslash || key_disable_flags.username) &&
+ input_text.contains(QLatin1Char('\\'))) {
+ return false;
+ }
+
+ if (key_disable_flags.numbers &&
+ std::any_of(input_text.begin(), input_text.end(), [](QChar c) { return c.isDigit(); })) {
+ return false;
+ }
+
+ if (bottom_osk_index == BottomOSKIndex::NumberPad &&
+ std::any_of(input_text.begin(), input_text.end(), [](QChar c) { return !c.isDigit(); })) {
+ return false;
+ }
+
+ return true;
+}
+
+void QtSoftwareKeyboardDialog::ChangeBottomOSKIndex() {
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ bottom_osk_index = BottomOSKIndex::UpperCase;
+ ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
+
+ ui->button_shift_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/osk_button_shift_lock_off.png);"
+ "\nimage-position: left;"));
+
+ ui->button_shift_shift->setIconSize(ui->button_shift->iconSize());
+ ui->button_backspace_shift->setIconSize(ui->button_backspace->iconSize());
+ break;
+ case BottomOSKIndex::UpperCase:
+ if (caps_lock_enabled) {
+ caps_lock_enabled = false;
+
+ ui->button_shift_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/osk_button_shift_lock_off.png);"
+ "\nimage-position: left;"));
+
+ ui->button_shift_shift->setIconSize(ui->button_shift->iconSize());
+ ui->button_backspace_shift->setIconSize(ui->button_backspace->iconSize());
+
+ ui->label_shift_shift->setText(QStringLiteral("Caps Lock"));
+
+ bottom_osk_index = BottomOSKIndex::LowerCase;
+ ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
+ } else {
+ caps_lock_enabled = true;
+
+ ui->button_shift_shift->setStyleSheet(
+ QStringLiteral("image: url(:/overlay/osk_button_shift_lock_on.png);"
+ "\nimage-position: left;"));
+
+ ui->button_shift_shift->setIconSize(ui->button_shift->iconSize());
+ ui->button_backspace_shift->setIconSize(ui->button_backspace->iconSize());
+
+ ui->label_shift_shift->setText(QStringLiteral("Caps Lock Off"));
+ }
+ break;
+ case BottomOSKIndex::NumberPad:
+ default:
+ break;
+ }
+}
+
+void QtSoftwareKeyboardDialog::NormalKeyboardButtonClicked(QPushButton* button) {
+ if (button == ui->button_ampersand) {
+ if (ui->topOSK->currentIndex() == 1) {
+ ui->text_edit_osk->insertPlainText(QStringLiteral("&"));
+ } else {
+ ui->line_edit_osk->insert(QStringLiteral("&"));
+ }
+ return;
+ }
+
+ if (button == ui->button_return || button == ui->button_return_shift) {
+ if (ui->topOSK->currentIndex() == 1) {
+ ui->text_edit_osk->insertPlainText(QStringLiteral("\n"));
+ } else {
+ ui->line_edit_osk->insert(QStringLiteral("\n"));
+ }
+ return;
+ }
+
+ if (button == ui->button_space || button == ui->button_space_shift) {
+ if (ui->topOSK->currentIndex() == 1) {
+ ui->text_edit_osk->insertPlainText(QStringLiteral(" "));
+ } else {
+ ui->line_edit_osk->insert(QStringLiteral(" "));
+ }
+ return;
+ }
+
+ if (button == ui->button_shift || button == ui->button_shift_shift) {
+ ChangeBottomOSKIndex();
+ return;
+ }
+
+ if (button == ui->button_backspace || button == ui->button_backspace_shift ||
+ button == ui->button_backspace_num) {
+ if (ui->topOSK->currentIndex() == 1) {
+ auto text_cursor = ui->text_edit_osk->textCursor();
+ ui->text_edit_osk->setTextCursor(text_cursor);
+ text_cursor.deletePreviousChar();
+ } else {
+ ui->line_edit_osk->backspace();
+ }
+ return;
+ }
+
+ if (button == ui->button_ok || button == ui->button_ok_shift || button == ui->button_ok_num) {
+ auto text = ui->topOSK->currentIndex() == 1
+ ? ui->text_edit_osk->toPlainText().toStdU16String()
+ : ui->line_edit_osk->text().toStdU16String();
+
+ emit SubmitNormalText(SwkbdResult::Ok, std::move(text));
+ return;
+ }
+
+ if (ui->topOSK->currentIndex() == 1) {
+ ui->text_edit_osk->insertPlainText(button->text());
+ } else {
+ ui->line_edit_osk->insert(button->text());
+ }
+
+ // Revert the keyboard to lowercase if the shift key is active.
+ if (bottom_osk_index == BottomOSKIndex::UpperCase && !caps_lock_enabled) {
+ // This is set to true since ChangeBottomOSKIndex will change bottom_osk_index to LowerCase
+ // if bottom_osk_index is UpperCase and caps_lock_enabled is true.
+ caps_lock_enabled = true;
+ ChangeBottomOSKIndex();
+ }
+}
+
+void QtSoftwareKeyboardDialog::InlineKeyboardButtonClicked(QPushButton* button) {
+ if (!button->isEnabled()) {
+ return;
+ }
+
+ if (button == ui->button_ampersand) {
+ InlineTextInsertString(u"&");
+ return;
+ }
+
+ if (button == ui->button_return || button == ui->button_return_shift) {
+ InlineTextInsertString(u"\n");
+ return;
+ }
+
+ if (button == ui->button_space || button == ui->button_space_shift) {
+ InlineTextInsertString(u" ");
+ return;
+ }
+
+ if (button == ui->button_shift || button == ui->button_shift_shift) {
+ ChangeBottomOSKIndex();
+ return;
+ }
+
+ if (button == ui->button_backspace || button == ui->button_backspace_shift ||
+ button == ui->button_backspace_num) {
+ if (cursor_position <= 0 || current_text.empty()) {
+ cursor_position = 0;
+ return;
+ }
+
+ --cursor_position;
+
+ current_text.erase(cursor_position, 1);
+
+ SetBackspaceOkEnabled();
+
+ emit SubmitInlineText(SwkbdReplyType::ChangedString, current_text, cursor_position);
+ return;
+ }
+
+ if (button == ui->button_ok || button == ui->button_ok_shift || button == ui->button_ok_num) {
+ emit SubmitInlineText(SwkbdReplyType::DecidedEnter, current_text, cursor_position);
+ return;
+ }
+
+ InlineTextInsertString(button->text().toStdU16String());
+
+ // Revert the keyboard to lowercase if the shift key is active.
+ if (bottom_osk_index == BottomOSKIndex::UpperCase && !caps_lock_enabled) {
+ // This is set to true since ChangeBottomOSKIndex will change bottom_osk_index to LowerCase
+ // if bottom_osk_index is UpperCase and caps_lock_enabled is true.
+ caps_lock_enabled = true;
+ ChangeBottomOSKIndex();
+ }
+}
+
+void QtSoftwareKeyboardDialog::InlineTextInsertString(std::u16string_view string) {
+ if ((current_text.size() + string.size()) > initialize_parameters.max_text_length) {
+ return;
+ }
+
+ current_text.insert(cursor_position, string);
+
+ cursor_position += static_cast<s32>(string.size());
+
+ SetBackspaceOkEnabled();
+
+ emit SubmitInlineText(SwkbdReplyType::ChangedString, current_text, cursor_position);
+}
+
+void QtSoftwareKeyboardDialog::SetupMouseHover() {
+ // setFocus() has a bug where continuously changing focus will cause the focus UI to
+ // mysteriously disappear. A workaround we have found is using the mouse to hover over
+ // the buttons to act in place of the button focus. As a result, we will have to set
+ // a blank cursor when hovering over all the buttons and set a no focus policy so the
+ // buttons do not stay in focus in addition to the mouse hover.
+ for (auto* button : all_buttons) {
+ button->setCursor(QCursor(Qt::BlankCursor));
+ button->setFocusPolicy(Qt::NoFocus);
+ }
+}
+
+template <HIDButton... T>
+void QtSoftwareKeyboardDialog::HandleButtonPressedOnce() {
+ const auto f = [this](HIDButton button) {
+ if (input_interpreter->IsButtonPressedOnce(button)) {
+ TranslateButtonPress(button);
+ }
+ };
+
+ (f(T), ...);
+}
+
+template <HIDButton... T>
+void QtSoftwareKeyboardDialog::HandleButtonHold() {
+ const auto f = [this](HIDButton button) {
+ if (input_interpreter->IsButtonHeld(button)) {
+ TranslateButtonPress(button);
+ }
+ };
+
+ (f(T), ...);
+}
+
+void QtSoftwareKeyboardDialog::TranslateButtonPress(HIDButton button) {
+ switch (button) {
+ case HIDButton::A:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ case BottomOSKIndex::UpperCase:
+ keyboard_buttons[static_cast<std::size_t>(bottom_osk_index)][row][column]->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ numberpad_buttons[row][column]->click();
+ break;
+ default:
+ break;
+ }
+ break;
+ case HIDButton::B:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ ui->button_backspace->click();
+ break;
+ case BottomOSKIndex::UpperCase:
+ ui->button_backspace_shift->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ ui->button_backspace_num->click();
+ break;
+ default:
+ break;
+ }
+ break;
+ case HIDButton::X:
+ if (is_inline) {
+ emit SubmitInlineText(SwkbdReplyType::DecidedCancel, current_text, cursor_position);
+ } else {
+ auto text = ui->topOSK->currentIndex() == 1
+ ? ui->text_edit_osk->toPlainText().toStdU16String()
+ : ui->line_edit_osk->text().toStdU16String();
+
+ emit SubmitNormalText(SwkbdResult::Cancel, std::move(text));
+ }
+ break;
+ case HIDButton::Y:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ ui->button_space->click();
+ break;
+ case BottomOSKIndex::UpperCase:
+ ui->button_space_shift->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ default:
+ break;
+ }
+ break;
+ case HIDButton::LStick:
+ case HIDButton::RStick:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ ui->button_shift->click();
+ break;
+ case BottomOSKIndex::UpperCase:
+ ui->button_shift_shift->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ default:
+ break;
+ }
+ break;
+ case HIDButton::L:
+ MoveTextCursorDirection(Direction::Left);
+ break;
+ case HIDButton::R:
+ MoveTextCursorDirection(Direction::Right);
+ break;
+ case HIDButton::Plus:
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ ui->button_ok->click();
+ break;
+ case BottomOSKIndex::UpperCase:
+ ui->button_ok_shift->click();
+ break;
+ case BottomOSKIndex::NumberPad:
+ ui->button_ok_num->click();
+ break;
+ default:
+ break;
+ }
+ break;
+ case HIDButton::DLeft:
+ case HIDButton::LStickLeft:
+ case HIDButton::RStickLeft:
+ MoveButtonDirection(Direction::Left);
+ break;
+ case HIDButton::DUp:
+ case HIDButton::LStickUp:
+ case HIDButton::RStickUp:
+ MoveButtonDirection(Direction::Up);
+ break;
+ case HIDButton::DRight:
+ case HIDButton::LStickRight:
+ case HIDButton::RStickRight:
+ MoveButtonDirection(Direction::Right);
+ break;
+ case HIDButton::DDown:
+ case HIDButton::LStickDown:
+ case HIDButton::RStickDown:
+ MoveButtonDirection(Direction::Down);
+ break;
+ default:
+ break;
+ }
+}
+
+void QtSoftwareKeyboardDialog::MoveButtonDirection(Direction direction) {
+ // Changes the row or column index depending on the direction.
+ auto move_direction = [this, direction](std::size_t max_rows, std::size_t max_columns) {
+ switch (direction) {
+ case Direction::Left:
+ column = (column + max_columns - 1) % max_columns;
+ break;
+ case Direction::Up:
+ row = (row + max_rows - 1) % max_rows;
+ break;
+ case Direction::Right:
+ column = (column + 1) % max_columns;
+ break;
+ case Direction::Down:
+ row = (row + 1) % max_rows;
+ break;
+ default:
+ break;
+ }
+ };
+
+ switch (bottom_osk_index) {
+ case BottomOSKIndex::LowerCase:
+ case BottomOSKIndex::UpperCase: {
+ const auto index = static_cast<std::size_t>(bottom_osk_index);
+
+ const auto* const prev_button = keyboard_buttons[index][row][column];
+ move_direction(NUM_ROWS_NORMAL, NUM_COLUMNS_NORMAL);
+ auto* curr_button = keyboard_buttons[index][row][column];
+
+ while (!curr_button || !curr_button->isEnabled() || curr_button == prev_button) {
+ move_direction(NUM_ROWS_NORMAL, NUM_COLUMNS_NORMAL);
+ curr_button = keyboard_buttons[index][row][column];
+ }
+
+ // This is a workaround for setFocus() randomly not showing focus in the UI
+ QCursor::setPos(curr_button->mapToGlobal(curr_button->rect().center()));
+ break;
+ }
+ case BottomOSKIndex::NumberPad: {
+ const auto* const prev_button = numberpad_buttons[row][column];
+ move_direction(NUM_ROWS_NUMPAD, NUM_COLUMNS_NUMPAD);
+ auto* curr_button = numberpad_buttons[row][column];
+
+ while (!curr_button || !curr_button->isEnabled() || curr_button == prev_button) {
+ move_direction(NUM_ROWS_NUMPAD, NUM_COLUMNS_NUMPAD);
+ curr_button = numberpad_buttons[row][column];
+ }
+
+ // This is a workaround for setFocus() randomly not showing focus in the UI
+ QCursor::setPos(curr_button->mapToGlobal(curr_button->rect().center()));
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void QtSoftwareKeyboardDialog::MoveTextCursorDirection(Direction direction) {
+ switch (direction) {
+ case Direction::Left:
+ if (is_inline) {
+ if (cursor_position <= 0) {
+ cursor_position = 0;
+ } else {
+ --cursor_position;
+ emit SubmitInlineText(SwkbdReplyType::MovedCursor, current_text, cursor_position);
+ }
+ } else {
+ if (ui->topOSK->currentIndex() == 1) {
+ ui->text_edit_osk->moveCursor(QTextCursor::Left);
+ } else {
+ ui->line_edit_osk->setCursorPosition(ui->line_edit_osk->cursorPosition() - 1);
+ }
+ }
+ break;
+ case Direction::Right:
+ if (is_inline) {
+ if (cursor_position >= static_cast<s32>(current_text.size())) {
+ cursor_position = static_cast<s32>(current_text.size());
+ } else {
+ ++cursor_position;
+ emit SubmitInlineText(SwkbdReplyType::MovedCursor, current_text, cursor_position);
+ }
+ } else {
+ if (ui->topOSK->currentIndex() == 1) {
+ ui->text_edit_osk->moveCursor(QTextCursor::Right);
+ } else {
+ ui->line_edit_osk->setCursorPosition(ui->line_edit_osk->cursorPosition() + 1);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void QtSoftwareKeyboardDialog::StartInputThread() {
+ if (input_thread_running) {
+ return;
+ }
+
+ input_thread_running = true;
+
+ input_thread = std::thread(&QtSoftwareKeyboardDialog::InputThread, this);
+}
+
+void QtSoftwareKeyboardDialog::StopInputThread() {
+ input_thread_running = false;
+
+ if (input_thread.joinable()) {
+ input_thread.join();
+ }
+
+ if (input_interpreter) {
+ input_interpreter->ResetButtonStates();
+ }
+}
+
+void QtSoftwareKeyboardDialog::InputThread() {
+ while (input_thread_running) {
+ input_interpreter->PollInput();
+
+ HandleButtonPressedOnce<HIDButton::A, HIDButton::B, HIDButton::X, HIDButton::Y,
+ HIDButton::LStick, HIDButton::RStick, HIDButton::L, HIDButton::R,
+ HIDButton::Plus, HIDButton::DLeft, HIDButton::DUp,
+ HIDButton::DRight, HIDButton::DDown, HIDButton::LStickLeft,
+ HIDButton::LStickUp, HIDButton::LStickRight, HIDButton::LStickDown,
+ HIDButton::RStickLeft, HIDButton::RStickUp, HIDButton::RStickRight,
+ HIDButton::RStickDown>();
+
+ HandleButtonHold<HIDButton::B, HIDButton::L, HIDButton::R, HIDButton::DLeft, HIDButton::DUp,
+ HIDButton::DRight, HIDButton::DDown, HIDButton::LStickLeft,
+ HIDButton::LStickUp, HIDButton::LStickRight, HIDButton::LStickDown,
+ HIDButton::RStickLeft, HIDButton::RStickUp, HIDButton::RStickRight,
+ HIDButton::RStickDown>();
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ }
+}
+
+QtSoftwareKeyboard::QtSoftwareKeyboard(GMainWindow& main_window) {
+ connect(this, &QtSoftwareKeyboard::MainWindowInitializeKeyboard, &main_window,
+ &GMainWindow::SoftwareKeyboardInitialize, Qt::QueuedConnection);
+ connect(this, &QtSoftwareKeyboard::MainWindowShowNormalKeyboard, &main_window,
+ &GMainWindow::SoftwareKeyboardShowNormal, Qt::QueuedConnection);
+ connect(this, &QtSoftwareKeyboard::MainWindowShowTextCheckDialog, &main_window,
+ &GMainWindow::SoftwareKeyboardShowTextCheck, Qt::QueuedConnection);
+ connect(this, &QtSoftwareKeyboard::MainWindowShowInlineKeyboard, &main_window,
+ &GMainWindow::SoftwareKeyboardShowInline, Qt::QueuedConnection);
+ connect(this, &QtSoftwareKeyboard::MainWindowHideInlineKeyboard, &main_window,
+ &GMainWindow::SoftwareKeyboardHideInline, Qt::QueuedConnection);
+ connect(this, &QtSoftwareKeyboard::MainWindowInlineTextChanged, &main_window,
+ &GMainWindow::SoftwareKeyboardInlineTextChanged, Qt::QueuedConnection);
+ connect(this, &QtSoftwareKeyboard::MainWindowExitKeyboard, &main_window,
+ &GMainWindow::SoftwareKeyboardExit, Qt::QueuedConnection);
+ connect(&main_window, &GMainWindow::SoftwareKeyboardSubmitNormalText, this,
+ &QtSoftwareKeyboard::SubmitNormalText, Qt::QueuedConnection);
+ connect(&main_window, &GMainWindow::SoftwareKeyboardSubmitInlineText, this,
+ &QtSoftwareKeyboard::SubmitInlineText, Qt::QueuedConnection);
+}
+
+QtSoftwareKeyboard::~QtSoftwareKeyboard() = default;
+
+void QtSoftwareKeyboard::InitializeKeyboard(
+ bool is_inline, Core::Frontend::KeyboardInitializeParameters initialize_parameters,
+ std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> submit_normal_callback_,
+ std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)>
+ submit_inline_callback_) {
+ if (is_inline) {
+ submit_inline_callback = std::move(submit_inline_callback_);
+ } else {
+ submit_normal_callback = std::move(submit_normal_callback_);
+ }
+
+ LOG_INFO(Service_AM,
+ "\nKeyboardInitializeParameters:"
+ "\nok_text={}"
+ "\nheader_text={}"
+ "\nsub_text={}"
+ "\nguide_text={}"
+ "\ninitial_text={}"
+ "\nmax_text_length={}"
+ "\nmin_text_length={}"
+ "\ninitial_cursor_position={}"
+ "\ntype={}"
+ "\npassword_mode={}"
+ "\ntext_draw_type={}"
+ "\nkey_disable_flags={}"
+ "\nuse_blur_background={}"
+ "\nenable_backspace_button={}"
+ "\nenable_return_button={}"
+ "\ndisable_cancel_button={}",
+ Common::UTF16ToUTF8(initialize_parameters.ok_text),
+ Common::UTF16ToUTF8(initialize_parameters.header_text),
+ Common::UTF16ToUTF8(initialize_parameters.sub_text),
+ Common::UTF16ToUTF8(initialize_parameters.guide_text),
+ Common::UTF16ToUTF8(initialize_parameters.initial_text),
+ initialize_parameters.max_text_length, initialize_parameters.min_text_length,
+ initialize_parameters.initial_cursor_position, initialize_parameters.type,
+ initialize_parameters.password_mode, initialize_parameters.text_draw_type,
+ initialize_parameters.key_disable_flags.raw, initialize_parameters.use_blur_background,
+ initialize_parameters.enable_backspace_button,
+ initialize_parameters.enable_return_button,
+ initialize_parameters.disable_cancel_button);
+
+ emit MainWindowInitializeKeyboard(is_inline, std::move(initialize_parameters));
+}
+
+void QtSoftwareKeyboard::ShowNormalKeyboard() const {
+ emit MainWindowShowNormalKeyboard();
+}
+
+void QtSoftwareKeyboard::ShowTextCheckDialog(
+ Service::AM::Applets::SwkbdTextCheckResult text_check_result,
+ std::u16string text_check_message) const {
+ emit MainWindowShowTextCheckDialog(text_check_result, std::move(text_check_message));
+}
+
+void QtSoftwareKeyboard::ShowInlineKeyboard(
+ Core::Frontend::InlineAppearParameters appear_parameters) const {
+ LOG_INFO(Service_AM,
+ "\nInlineAppearParameters:"
+ "\nmax_text_length={}"
+ "\nmin_text_length={}"
+ "\nkey_top_scale_x={}"
+ "\nkey_top_scale_y={}"
+ "\nkey_top_translate_x={}"
+ "\nkey_top_translate_y={}"
+ "\ntype={}"
+ "\nkey_disable_flags={}"
+ "\nkey_top_as_floating={}"
+ "\nenable_backspace_button={}"
+ "\nenable_return_button={}"
+ "\ndisable_cancel_button={}",
+ appear_parameters.max_text_length, appear_parameters.min_text_length,
+ appear_parameters.key_top_scale_x, appear_parameters.key_top_scale_y,
+ appear_parameters.key_top_translate_x, appear_parameters.key_top_translate_y,
+ appear_parameters.type, appear_parameters.key_disable_flags.raw,
+ appear_parameters.key_top_as_floating, appear_parameters.enable_backspace_button,
+ appear_parameters.enable_return_button, appear_parameters.disable_cancel_button);
+
+ emit MainWindowShowInlineKeyboard(std::move(appear_parameters));
+}
+
+void QtSoftwareKeyboard::HideInlineKeyboard() const {
+ emit MainWindowHideInlineKeyboard();
+}
+
+void QtSoftwareKeyboard::InlineTextChanged(
+ Core::Frontend::InlineTextParameters text_parameters) const {
+ LOG_INFO(Service_AM,
+ "\nInlineTextParameters:"
+ "\ninput_text={}"
+ "\ncursor_position={}",
+ Common::UTF16ToUTF8(text_parameters.input_text), text_parameters.cursor_position);
+
+ emit MainWindowInlineTextChanged(std::move(text_parameters));
+}
+
+void QtSoftwareKeyboard::ExitKeyboard() const {
+ emit MainWindowExitKeyboard();
+}
+
+void QtSoftwareKeyboard::SubmitNormalText(Service::AM::Applets::SwkbdResult result,
+ std::u16string submitted_text) const {
+ submit_normal_callback(result, submitted_text);
+}
+
+void QtSoftwareKeyboard::SubmitInlineText(Service::AM::Applets::SwkbdReplyType reply_type,
+ std::u16string submitted_text,
+ s32 cursor_position) const {
+ submit_inline_callback(reply_type, submitted_text, cursor_position);
+}
diff --git a/src/yuzu/applets/software_keyboard.h b/src/yuzu/applets/qt_software_keyboard.h
index 1a03c098c..1a03c098c 100644
--- a/src/yuzu/applets/software_keyboard.h
+++ b/src/yuzu/applets/qt_software_keyboard.h
diff --git a/src/yuzu/applets/software_keyboard.ui b/src/yuzu/applets/qt_software_keyboard.ui
index b0a1fcde9..b0a1fcde9 100644
--- a/src/yuzu/applets/software_keyboard.ui
+++ b/src/yuzu/applets/qt_software_keyboard.ui
diff --git a/src/yuzu/applets/qt_web_browser.cpp b/src/yuzu/applets/qt_web_browser.cpp
new file mode 100644
index 000000000..b112dd7b0
--- /dev/null
+++ b/src/yuzu/applets/qt_web_browser.cpp
@@ -0,0 +1,417 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#ifdef YUZU_USE_QT_WEB_ENGINE
+#include <QKeyEvent>
+
+#include <QWebEngineProfile>
+#include <QWebEngineScript>
+#include <QWebEngineScriptCollection>
+#include <QWebEngineSettings>
+#include <QWebEngineUrlScheme>
+#endif
+
+#include "common/fs/path_util.h"
+#include "core/core.h"
+#include "core/frontend/input_interpreter.h"
+#include "input_common/keyboard.h"
+#include "input_common/main.h"
+#include "yuzu/applets/qt_web_browser.h"
+#include "yuzu/applets/qt_web_browser_scripts.h"
+#include "yuzu/main.h"
+#include "yuzu/util/url_request_interceptor.h"
+
+#ifdef YUZU_USE_QT_WEB_ENGINE
+
+namespace {
+
+constexpr int HIDButtonToKey(HIDButton button) {
+ switch (button) {
+ case HIDButton::DLeft:
+ case HIDButton::LStickLeft:
+ return Qt::Key_Left;
+ case HIDButton::DUp:
+ case HIDButton::LStickUp:
+ return Qt::Key_Up;
+ case HIDButton::DRight:
+ case HIDButton::LStickRight:
+ return Qt::Key_Right;
+ case HIDButton::DDown:
+ case HIDButton::LStickDown:
+ return Qt::Key_Down;
+ default:
+ return 0;
+ }
+}
+
+} // Anonymous namespace
+
+QtNXWebEngineView::QtNXWebEngineView(QWidget* parent, Core::System& system,
+ InputCommon::InputSubsystem* input_subsystem_)
+ : QWebEngineView(parent), input_subsystem{input_subsystem_},
+ url_interceptor(std::make_unique<UrlRequestInterceptor>()),
+ input_interpreter(std::make_unique<InputInterpreter>(system)),
+ default_profile{QWebEngineProfile::defaultProfile()},
+ global_settings{QWebEngineSettings::globalSettings()} {
+ QWebEngineScript gamepad;
+ QWebEngineScript window_nx;
+
+ gamepad.setName(QStringLiteral("gamepad_script.js"));
+ window_nx.setName(QStringLiteral("window_nx_script.js"));
+
+ gamepad.setSourceCode(QString::fromStdString(GAMEPAD_SCRIPT));
+ window_nx.setSourceCode(QString::fromStdString(WINDOW_NX_SCRIPT));
+
+ gamepad.setInjectionPoint(QWebEngineScript::DocumentCreation);
+ window_nx.setInjectionPoint(QWebEngineScript::DocumentCreation);
+
+ gamepad.setWorldId(QWebEngineScript::MainWorld);
+ window_nx.setWorldId(QWebEngineScript::MainWorld);
+
+ gamepad.setRunsOnSubFrames(true);
+ window_nx.setRunsOnSubFrames(true);
+
+ default_profile->scripts()->insert(gamepad);
+ default_profile->scripts()->insert(window_nx);
+
+ default_profile->setRequestInterceptor(url_interceptor.get());
+
+ global_settings->setAttribute(QWebEngineSettings::LocalContentCanAccessRemoteUrls, true);
+ global_settings->setAttribute(QWebEngineSettings::FullScreenSupportEnabled, true);
+ global_settings->setAttribute(QWebEngineSettings::AllowRunningInsecureContent, true);
+ global_settings->setAttribute(QWebEngineSettings::FocusOnNavigationEnabled, true);
+ global_settings->setAttribute(QWebEngineSettings::AllowWindowActivationFromJavaScript, true);
+ global_settings->setAttribute(QWebEngineSettings::ShowScrollBars, false);
+
+ global_settings->setFontFamily(QWebEngineSettings::StandardFont, QStringLiteral("Roboto"));
+
+ connect(
+ page(), &QWebEnginePage::windowCloseRequested, page(),
+ [this] {
+ if (page()->url() == url_interceptor->GetRequestedURL()) {
+ SetFinished(true);
+ SetExitReason(Service::AM::Applets::WebExitReason::WindowClosed);
+ }
+ },
+ Qt::QueuedConnection);
+}
+
+QtNXWebEngineView::~QtNXWebEngineView() {
+ SetFinished(true);
+ StopInputThread();
+}
+
+void QtNXWebEngineView::LoadLocalWebPage(const std::string& main_url,
+ const std::string& additional_args) {
+ is_local = true;
+
+ LoadExtractedFonts();
+ SetUserAgent(UserAgent::WebApplet);
+ SetFinished(false);
+ SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed);
+ SetLastURL("http://localhost/");
+ StartInputThread();
+
+ load(QUrl(QUrl::fromLocalFile(QString::fromStdString(main_url)).toString() +
+ QString::fromStdString(additional_args)));
+}
+
+void QtNXWebEngineView::LoadExternalWebPage(const std::string& main_url,
+ const std::string& additional_args) {
+ is_local = false;
+
+ SetUserAgent(UserAgent::WebApplet);
+ SetFinished(false);
+ SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed);
+ SetLastURL("http://localhost/");
+ StartInputThread();
+
+ load(QUrl(QString::fromStdString(main_url) + QString::fromStdString(additional_args)));
+}
+
+void QtNXWebEngineView::SetUserAgent(UserAgent user_agent) {
+ const QString user_agent_str = [user_agent] {
+ switch (user_agent) {
+ case UserAgent::WebApplet:
+ default:
+ return QStringLiteral("WebApplet");
+ case UserAgent::ShopN:
+ return QStringLiteral("ShopN");
+ case UserAgent::LoginApplet:
+ return QStringLiteral("LoginApplet");
+ case UserAgent::ShareApplet:
+ return QStringLiteral("ShareApplet");
+ case UserAgent::LobbyApplet:
+ return QStringLiteral("LobbyApplet");
+ case UserAgent::WifiWebAuthApplet:
+ return QStringLiteral("WifiWebAuthApplet");
+ }
+ }();
+
+ QWebEngineProfile::defaultProfile()->setHttpUserAgent(
+ QStringLiteral("Mozilla/5.0 (Nintendo Switch; %1) AppleWebKit/606.4 "
+ "(KHTML, like Gecko) NF/6.0.1.15.4 NintendoBrowser/5.1.0.20389")
+ .arg(user_agent_str));
+}
+
+bool QtNXWebEngineView::IsFinished() const {
+ return finished;
+}
+
+void QtNXWebEngineView::SetFinished(bool finished_) {
+ finished = finished_;
+}
+
+Service::AM::Applets::WebExitReason QtNXWebEngineView::GetExitReason() const {
+ return exit_reason;
+}
+
+void QtNXWebEngineView::SetExitReason(Service::AM::Applets::WebExitReason exit_reason_) {
+ exit_reason = exit_reason_;
+}
+
+const std::string& QtNXWebEngineView::GetLastURL() const {
+ return last_url;
+}
+
+void QtNXWebEngineView::SetLastURL(std::string last_url_) {
+ last_url = std::move(last_url_);
+}
+
+QString QtNXWebEngineView::GetCurrentURL() const {
+ return url_interceptor->GetRequestedURL().toString();
+}
+
+void QtNXWebEngineView::hide() {
+ SetFinished(true);
+ StopInputThread();
+
+ QWidget::hide();
+}
+
+void QtNXWebEngineView::keyPressEvent(QKeyEvent* event) {
+ if (is_local) {
+ input_subsystem->GetKeyboard()->PressKey(event->key());
+ }
+}
+
+void QtNXWebEngineView::keyReleaseEvent(QKeyEvent* event) {
+ if (is_local) {
+ input_subsystem->GetKeyboard()->ReleaseKey(event->key());
+ }
+}
+
+template <HIDButton... T>
+void QtNXWebEngineView::HandleWindowFooterButtonPressedOnce() {
+ const auto f = [this](HIDButton button) {
+ if (input_interpreter->IsButtonPressedOnce(button)) {
+ page()->runJavaScript(
+ QStringLiteral("yuzu_key_callbacks[%1] == null;").arg(static_cast<u8>(button)),
+ [&](const QVariant& variant) {
+ if (variant.toBool()) {
+ switch (button) {
+ case HIDButton::A:
+ SendMultipleKeyPressEvents<Qt::Key_A, Qt::Key_Space, Qt::Key_Return>();
+ break;
+ case HIDButton::B:
+ SendKeyPressEvent(Qt::Key_B);
+ break;
+ case HIDButton::X:
+ SendKeyPressEvent(Qt::Key_X);
+ break;
+ case HIDButton::Y:
+ SendKeyPressEvent(Qt::Key_Y);
+ break;
+ default:
+ break;
+ }
+ }
+ });
+
+ page()->runJavaScript(
+ QStringLiteral("if (yuzu_key_callbacks[%1] != null) { yuzu_key_callbacks[%1](); }")
+ .arg(static_cast<u8>(button)));
+ }
+ };
+
+ (f(T), ...);
+}
+
+template <HIDButton... T>
+void QtNXWebEngineView::HandleWindowKeyButtonPressedOnce() {
+ const auto f = [this](HIDButton button) {
+ if (input_interpreter->IsButtonPressedOnce(button)) {
+ SendKeyPressEvent(HIDButtonToKey(button));
+ }
+ };
+
+ (f(T), ...);
+}
+
+template <HIDButton... T>
+void QtNXWebEngineView::HandleWindowKeyButtonHold() {
+ const auto f = [this](HIDButton button) {
+ if (input_interpreter->IsButtonHeld(button)) {
+ SendKeyPressEvent(HIDButtonToKey(button));
+ }
+ };
+
+ (f(T), ...);
+}
+
+void QtNXWebEngineView::SendKeyPressEvent(int key) {
+ if (key == 0) {
+ return;
+ }
+
+ QCoreApplication::postEvent(focusProxy(),
+ new QKeyEvent(QKeyEvent::KeyPress, key, Qt::NoModifier));
+ QCoreApplication::postEvent(focusProxy(),
+ new QKeyEvent(QKeyEvent::KeyRelease, key, Qt::NoModifier));
+}
+
+void QtNXWebEngineView::StartInputThread() {
+ if (input_thread_running) {
+ return;
+ }
+
+ input_thread_running = true;
+ input_thread = std::thread(&QtNXWebEngineView::InputThread, this);
+}
+
+void QtNXWebEngineView::StopInputThread() {
+ if (is_local) {
+ QWidget::releaseKeyboard();
+ }
+
+ input_thread_running = false;
+ if (input_thread.joinable()) {
+ input_thread.join();
+ }
+}
+
+void QtNXWebEngineView::InputThread() {
+ // Wait for 1 second before allowing any inputs to be processed.
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+
+ if (is_local) {
+ QWidget::grabKeyboard();
+ }
+
+ while (input_thread_running) {
+ input_interpreter->PollInput();
+
+ HandleWindowFooterButtonPressedOnce<HIDButton::A, HIDButton::B, HIDButton::X, HIDButton::Y,
+ HIDButton::L, HIDButton::R>();
+
+ HandleWindowKeyButtonPressedOnce<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight,
+ HIDButton::DDown, HIDButton::LStickLeft,
+ HIDButton::LStickUp, HIDButton::LStickRight,
+ HIDButton::LStickDown>();
+
+ HandleWindowKeyButtonHold<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight,
+ HIDButton::DDown, HIDButton::LStickLeft, HIDButton::LStickUp,
+ HIDButton::LStickRight, HIDButton::LStickDown>();
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ }
+}
+
+void QtNXWebEngineView::LoadExtractedFonts() {
+ QWebEngineScript nx_font_css;
+ QWebEngineScript load_nx_font;
+
+ auto fonts_dir_str = Common::FS::PathToUTF8String(
+ Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) / "fonts/");
+
+ std::replace(fonts_dir_str.begin(), fonts_dir_str.end(), '\\', '/');
+
+ const auto fonts_dir = QString::fromStdString(fonts_dir_str);
+
+ nx_font_css.setName(QStringLiteral("nx_font_css.js"));
+ load_nx_font.setName(QStringLiteral("load_nx_font.js"));
+
+ nx_font_css.setSourceCode(
+ QString::fromStdString(NX_FONT_CSS)
+ .arg(fonts_dir + QStringLiteral("FontStandard.ttf"))
+ .arg(fonts_dir + QStringLiteral("FontChineseSimplified.ttf"))
+ .arg(fonts_dir + QStringLiteral("FontExtendedChineseSimplified.ttf"))
+ .arg(fonts_dir + QStringLiteral("FontChineseTraditional.ttf"))
+ .arg(fonts_dir + QStringLiteral("FontKorean.ttf"))
+ .arg(fonts_dir + QStringLiteral("FontNintendoExtended.ttf"))
+ .arg(fonts_dir + QStringLiteral("FontNintendoExtended2.ttf")));
+ load_nx_font.setSourceCode(QString::fromStdString(LOAD_NX_FONT));
+
+ nx_font_css.setInjectionPoint(QWebEngineScript::DocumentReady);
+ load_nx_font.setInjectionPoint(QWebEngineScript::Deferred);
+
+ nx_font_css.setWorldId(QWebEngineScript::MainWorld);
+ load_nx_font.setWorldId(QWebEngineScript::MainWorld);
+
+ nx_font_css.setRunsOnSubFrames(true);
+ load_nx_font.setRunsOnSubFrames(true);
+
+ default_profile->scripts()->insert(nx_font_css);
+ default_profile->scripts()->insert(load_nx_font);
+
+ connect(
+ url_interceptor.get(), &UrlRequestInterceptor::FrameChanged, url_interceptor.get(),
+ [this] {
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ page()->runJavaScript(QString::fromStdString(LOAD_NX_FONT));
+ },
+ Qt::QueuedConnection);
+}
+
+#endif
+
+QtWebBrowser::QtWebBrowser(GMainWindow& main_window) {
+ connect(this, &QtWebBrowser::MainWindowOpenWebPage, &main_window,
+ &GMainWindow::WebBrowserOpenWebPage, Qt::QueuedConnection);
+ connect(&main_window, &GMainWindow::WebBrowserExtractOfflineRomFS, this,
+ &QtWebBrowser::MainWindowExtractOfflineRomFS, Qt::QueuedConnection);
+ connect(&main_window, &GMainWindow::WebBrowserClosed, this,
+ &QtWebBrowser::MainWindowWebBrowserClosed, Qt::QueuedConnection);
+}
+
+QtWebBrowser::~QtWebBrowser() = default;
+
+void QtWebBrowser::OpenLocalWebPage(
+ const std::string& local_url, std::function<void()> extract_romfs_callback_,
+ std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const {
+ extract_romfs_callback = std::move(extract_romfs_callback_);
+ callback = std::move(callback_);
+
+ const auto index = local_url.find('?');
+
+ if (index == std::string::npos) {
+ emit MainWindowOpenWebPage(local_url, "", true);
+ } else {
+ emit MainWindowOpenWebPage(local_url.substr(0, index), local_url.substr(index), true);
+ }
+}
+
+void QtWebBrowser::OpenExternalWebPage(
+ const std::string& external_url,
+ std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const {
+ callback = std::move(callback_);
+
+ const auto index = external_url.find('?');
+
+ if (index == std::string::npos) {
+ emit MainWindowOpenWebPage(external_url, "", false);
+ } else {
+ emit MainWindowOpenWebPage(external_url.substr(0, index), external_url.substr(index),
+ false);
+ }
+}
+
+void QtWebBrowser::MainWindowExtractOfflineRomFS() {
+ extract_romfs_callback();
+}
+
+void QtWebBrowser::MainWindowWebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason,
+ std::string last_url) {
+ callback(exit_reason, last_url);
+}
diff --git a/src/yuzu/applets/web_browser.h b/src/yuzu/applets/qt_web_browser.h
index 7ad07409f..7ad07409f 100644
--- a/src/yuzu/applets/web_browser.h
+++ b/src/yuzu/applets/qt_web_browser.h
diff --git a/src/yuzu/applets/web_browser_scripts.h b/src/yuzu/applets/qt_web_browser_scripts.h
index 992837a85..992837a85 100644
--- a/src/yuzu/applets/web_browser_scripts.h
+++ b/src/yuzu/applets/qt_web_browser_scripts.h
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
deleted file mode 100644
index aa453a79f..000000000
--- a/src/yuzu/applets/software_keyboard.cpp
+++ /dev/null
@@ -1,1620 +0,0 @@
-// Copyright 2021 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <QCursor>
-#include <QKeyEvent>
-#include <QScreen>
-
-#include "common/logging/log.h"
-#include "common/settings.h"
-#include "common/string_util.h"
-#include "core/core.h"
-#include "core/frontend/input_interpreter.h"
-#include "ui_software_keyboard.h"
-#include "yuzu/applets/software_keyboard.h"
-#include "yuzu/main.h"
-#include "yuzu/util/overlay_dialog.h"
-
-namespace {
-
-using namespace Service::AM::Applets;
-
-constexpr float BASE_HEADER_FONT_SIZE = 23.0f;
-constexpr float BASE_SUB_FONT_SIZE = 17.0f;
-constexpr float BASE_EDITOR_FONT_SIZE = 26.0f;
-constexpr float BASE_CHAR_BUTTON_FONT_SIZE = 28.0f;
-constexpr float BASE_LABEL_BUTTON_FONT_SIZE = 18.0f;
-constexpr float BASE_ICON_BUTTON_SIZE = 36.0f;
-[[maybe_unused]] constexpr float BASE_WIDTH = 1280.0f;
-constexpr float BASE_HEIGHT = 720.0f;
-
-} // Anonymous namespace
-
-QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
- QWidget* parent, Core::System& system_, bool is_inline_,
- Core::Frontend::KeyboardInitializeParameters initialize_parameters_)
- : QDialog(parent), ui{std::make_unique<Ui::QtSoftwareKeyboardDialog>()}, system{system_},
- is_inline{is_inline_}, initialize_parameters{std::move(initialize_parameters_)} {
- ui->setupUi(this);
-
- setWindowFlags(Qt::Dialog | Qt::FramelessWindowHint | Qt::WindowTitleHint |
- Qt::WindowSystemMenuHint | Qt::CustomizeWindowHint);
- setWindowModality(Qt::WindowModal);
- setAttribute(Qt::WA_DeleteOnClose);
- setAttribute(Qt::WA_TranslucentBackground);
-
- keyboard_buttons = {{
- {{
- {
- ui->button_1,
- ui->button_2,
- ui->button_3,
- ui->button_4,
- ui->button_5,
- ui->button_6,
- ui->button_7,
- ui->button_8,
- ui->button_9,
- ui->button_0,
- ui->button_minus,
- ui->button_backspace,
- },
- {
- ui->button_q,
- ui->button_w,
- ui->button_e,
- ui->button_r,
- ui->button_t,
- ui->button_y,
- ui->button_u,
- ui->button_i,
- ui->button_o,
- ui->button_p,
- ui->button_slash,
- ui->button_return,
- },
- {
- ui->button_a,
- ui->button_s,
- ui->button_d,
- ui->button_f,
- ui->button_g,
- ui->button_h,
- ui->button_j,
- ui->button_k,
- ui->button_l,
- ui->button_colon,
- ui->button_apostrophe,
- ui->button_return,
- },
- {
- ui->button_z,
- ui->button_x,
- ui->button_c,
- ui->button_v,
- ui->button_b,
- ui->button_n,
- ui->button_m,
- ui->button_comma,
- ui->button_dot,
- ui->button_question,
- ui->button_exclamation,
- ui->button_ok,
- },
- {
- ui->button_shift,
- ui->button_shift,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_space,
- ui->button_ok,
- },
- }},
- {{
- {
- ui->button_hash,
- ui->button_left_bracket,
- ui->button_right_bracket,
- ui->button_dollar,
- ui->button_percent,
- ui->button_circumflex,
- ui->button_ampersand,
- ui->button_asterisk,
- ui->button_left_parenthesis,
- ui->button_right_parenthesis,
- ui->button_underscore,
- ui->button_backspace_shift,
- },
- {
- ui->button_q_shift,
- ui->button_w_shift,
- ui->button_e_shift,
- ui->button_r_shift,
- ui->button_t_shift,
- ui->button_y_shift,
- ui->button_u_shift,
- ui->button_i_shift,
- ui->button_o_shift,
- ui->button_p_shift,
- ui->button_at,
- ui->button_return_shift,
- },
- {
- ui->button_a_shift,
- ui->button_s_shift,
- ui->button_d_shift,
- ui->button_f_shift,
- ui->button_g_shift,
- ui->button_h_shift,
- ui->button_j_shift,
- ui->button_k_shift,
- ui->button_l_shift,
- ui->button_semicolon,
- ui->button_quotation,
- ui->button_return_shift,
- },
- {
- ui->button_z_shift,
- ui->button_x_shift,
- ui->button_c_shift,
- ui->button_v_shift,
- ui->button_b_shift,
- ui->button_n_shift,
- ui->button_m_shift,
- ui->button_less_than,
- ui->button_greater_than,
- ui->button_plus,
- ui->button_equal,
- ui->button_ok_shift,
- },
- {
- ui->button_shift_shift,
- ui->button_shift_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_space_shift,
- ui->button_ok_shift,
- },
- }},
- }};
-
- numberpad_buttons = {{
- {
- ui->button_1_num,
- ui->button_2_num,
- ui->button_3_num,
- ui->button_backspace_num,
- },
- {
- ui->button_4_num,
- ui->button_5_num,
- ui->button_6_num,
- ui->button_ok_num,
- },
- {
- ui->button_7_num,
- ui->button_8_num,
- ui->button_9_num,
- ui->button_ok_num,
- },
- {
- nullptr,
- ui->button_0_num,
- nullptr,
- ui->button_ok_num,
- },
- }};
-
- all_buttons = {
- ui->button_1,
- ui->button_2,
- ui->button_3,
- ui->button_4,
- ui->button_5,
- ui->button_6,
- ui->button_7,
- ui->button_8,
- ui->button_9,
- ui->button_0,
- ui->button_minus,
- ui->button_backspace,
- ui->button_q,
- ui->button_w,
- ui->button_e,
- ui->button_r,
- ui->button_t,
- ui->button_y,
- ui->button_u,
- ui->button_i,
- ui->button_o,
- ui->button_p,
- ui->button_slash,
- ui->button_return,
- ui->button_a,
- ui->button_s,
- ui->button_d,
- ui->button_f,
- ui->button_g,
- ui->button_h,
- ui->button_j,
- ui->button_k,
- ui->button_l,
- ui->button_colon,
- ui->button_apostrophe,
- ui->button_z,
- ui->button_x,
- ui->button_c,
- ui->button_v,
- ui->button_b,
- ui->button_n,
- ui->button_m,
- ui->button_comma,
- ui->button_dot,
- ui->button_question,
- ui->button_exclamation,
- ui->button_ok,
- ui->button_shift,
- ui->button_space,
- ui->button_hash,
- ui->button_left_bracket,
- ui->button_right_bracket,
- ui->button_dollar,
- ui->button_percent,
- ui->button_circumflex,
- ui->button_ampersand,
- ui->button_asterisk,
- ui->button_left_parenthesis,
- ui->button_right_parenthesis,
- ui->button_underscore,
- ui->button_backspace_shift,
- ui->button_q_shift,
- ui->button_w_shift,
- ui->button_e_shift,
- ui->button_r_shift,
- ui->button_t_shift,
- ui->button_y_shift,
- ui->button_u_shift,
- ui->button_i_shift,
- ui->button_o_shift,
- ui->button_p_shift,
- ui->button_at,
- ui->button_return_shift,
- ui->button_a_shift,
- ui->button_s_shift,
- ui->button_d_shift,
- ui->button_f_shift,
- ui->button_g_shift,
- ui->button_h_shift,
- ui->button_j_shift,
- ui->button_k_shift,
- ui->button_l_shift,
- ui->button_semicolon,
- ui->button_quotation,
- ui->button_z_shift,
- ui->button_x_shift,
- ui->button_c_shift,
- ui->button_v_shift,
- ui->button_b_shift,
- ui->button_n_shift,
- ui->button_m_shift,
- ui->button_less_than,
- ui->button_greater_than,
- ui->button_plus,
- ui->button_equal,
- ui->button_ok_shift,
- ui->button_shift_shift,
- ui->button_space_shift,
- ui->button_1_num,
- ui->button_2_num,
- ui->button_3_num,
- ui->button_backspace_num,
- ui->button_4_num,
- ui->button_5_num,
- ui->button_6_num,
- ui->button_ok_num,
- ui->button_7_num,
- ui->button_8_num,
- ui->button_9_num,
- ui->button_0_num,
- };
-
- SetupMouseHover();
-
- if (!initialize_parameters.ok_text.empty()) {
- ui->button_ok->setText(QString::fromStdU16String(initialize_parameters.ok_text));
- }
-
- ui->label_header->setText(QString::fromStdU16String(initialize_parameters.header_text));
- ui->label_sub->setText(QString::fromStdU16String(initialize_parameters.sub_text));
-
- current_text = initialize_parameters.initial_text;
- cursor_position = initialize_parameters.initial_cursor_position;
-
- SetTextDrawType();
-
- for (auto* button : all_buttons) {
- connect(button, &QPushButton::clicked, this, [this, button](bool) {
- if (is_inline) {
- InlineKeyboardButtonClicked(button);
- } else {
- NormalKeyboardButtonClicked(button);
- }
- });
- }
-
- // TODO (Morph): Remove this when InputInterpreter no longer relies on the HID backend
- if (system.IsPoweredOn()) {
- input_interpreter = std::make_unique<InputInterpreter>(system);
- }
-}
-
-QtSoftwareKeyboardDialog::~QtSoftwareKeyboardDialog() {
- StopInputThread();
-}
-
-void QtSoftwareKeyboardDialog::ShowNormalKeyboard(QPoint pos, QSize size) {
- if (isVisible()) {
- return;
- }
-
- MoveAndResizeWindow(pos, size);
-
- SetKeyboardType();
- SetPasswordMode();
- SetControllerImage();
- DisableKeyboardButtons();
- SetBackspaceOkEnabled();
-
- open();
-}
-
-void QtSoftwareKeyboardDialog::ShowTextCheckDialog(
- Service::AM::Applets::SwkbdTextCheckResult text_check_result,
- std::u16string text_check_message) {
- switch (text_check_result) {
- case SwkbdTextCheckResult::Success:
- case SwkbdTextCheckResult::Silent:
- default:
- break;
- case SwkbdTextCheckResult::Failure: {
- StopInputThread();
-
- OverlayDialog dialog(this, system, QString{}, QString::fromStdU16String(text_check_message),
- QString{}, tr("OK"), Qt::AlignCenter);
- dialog.exec();
-
- StartInputThread();
- break;
- }
- case SwkbdTextCheckResult::Confirm: {
- StopInputThread();
-
- OverlayDialog dialog(this, system, QString{}, QString::fromStdU16String(text_check_message),
- tr("Cancel"), tr("OK"), Qt::AlignCenter);
- if (dialog.exec() != QDialog::Accepted) {
- StartInputThread();
- break;
- }
-
- auto text = ui->topOSK->currentIndex() == 1
- ? ui->text_edit_osk->toPlainText().toStdU16String()
- : ui->line_edit_osk->text().toStdU16String();
-
- emit SubmitNormalText(SwkbdResult::Ok, std::move(text));
- break;
- }
- }
-}
-
-void QtSoftwareKeyboardDialog::ShowInlineKeyboard(
- Core::Frontend::InlineAppearParameters appear_parameters, QPoint pos, QSize size) {
- MoveAndResizeWindow(pos, size);
-
- ui->topOSK->setStyleSheet(QStringLiteral("background: rgba(0, 0, 0, 0);"));
-
- ui->headerOSK->hide();
- ui->subOSK->hide();
- ui->inputOSK->hide();
- ui->charactersOSK->hide();
- ui->inputBoxOSK->hide();
- ui->charactersBoxOSK->hide();
-
- initialize_parameters.max_text_length = appear_parameters.max_text_length;
- initialize_parameters.min_text_length = appear_parameters.min_text_length;
- initialize_parameters.type = appear_parameters.type;
- initialize_parameters.key_disable_flags = appear_parameters.key_disable_flags;
- initialize_parameters.enable_backspace_button = appear_parameters.enable_backspace_button;
- initialize_parameters.enable_return_button = appear_parameters.enable_return_button;
- initialize_parameters.disable_cancel_button = initialize_parameters.disable_cancel_button;
-
- SetKeyboardType();
- SetControllerImage();
- DisableKeyboardButtons();
- SetBackspaceOkEnabled();
-
- open();
-}
-
-void QtSoftwareKeyboardDialog::HideInlineKeyboard() {
- StopInputThread();
- QDialog::hide();
-}
-
-void QtSoftwareKeyboardDialog::InlineTextChanged(
- Core::Frontend::InlineTextParameters text_parameters) {
- current_text = text_parameters.input_text;
- cursor_position = text_parameters.cursor_position;
-
- SetBackspaceOkEnabled();
-}
-
-void QtSoftwareKeyboardDialog::ExitKeyboard() {
- StopInputThread();
- QDialog::done(QDialog::Accepted);
-}
-
-void QtSoftwareKeyboardDialog::open() {
- QDialog::open();
-
- row = 0;
- column = 0;
-
- const auto* const curr_button =
- keyboard_buttons[static_cast<int>(bottom_osk_index)][row][column];
-
- // This is a workaround for setFocus() randomly not showing focus in the UI
- QCursor::setPos(curr_button->mapToGlobal(curr_button->rect().center()));
-
- StartInputThread();
-}
-
-void QtSoftwareKeyboardDialog::reject() {
- // Pressing the ESC key in a dialog calls QDialog::reject().
- // We will override this behavior to the "Cancel" action on the software keyboard.
- TranslateButtonPress(HIDButton::X);
-}
-
-void QtSoftwareKeyboardDialog::keyPressEvent(QKeyEvent* event) {
- if (!is_inline) {
- QDialog::keyPressEvent(event);
- return;
- }
-
- const auto entered_key = event->key();
-
- switch (entered_key) {
- case Qt::Key_Escape:
- QDialog::keyPressEvent(event);
- return;
- case Qt::Key_Backspace:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- ui->button_backspace->click();
- break;
- case BottomOSKIndex::UpperCase:
- ui->button_backspace_shift->click();
- break;
- case BottomOSKIndex::NumberPad:
- ui->button_backspace_num->click();
- break;
- default:
- break;
- }
- return;
- case Qt::Key_Return:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- ui->button_ok->click();
- break;
- case BottomOSKIndex::UpperCase:
- ui->button_ok_shift->click();
- break;
- case BottomOSKIndex::NumberPad:
- ui->button_ok_num->click();
- break;
- default:
- break;
- }
- return;
- case Qt::Key_Left:
- MoveTextCursorDirection(Direction::Left);
- return;
- case Qt::Key_Right:
- MoveTextCursorDirection(Direction::Right);
- return;
- default:
- break;
- }
-
- const auto entered_text = event->text();
-
- if (entered_text.isEmpty()) {
- return;
- }
-
- InlineTextInsertString(entered_text.toStdU16String());
-}
-
-void QtSoftwareKeyboardDialog::MoveAndResizeWindow(QPoint pos, QSize size) {
- QDialog::move(pos);
- QDialog::resize(size);
-
- // High DPI
- const float dpi_scale = qApp->screenAt(pos)->logicalDotsPerInch() / 96.0f;
-
- RescaleKeyboardElements(size.width(), size.height(), dpi_scale);
-}
-
-void QtSoftwareKeyboardDialog::RescaleKeyboardElements(float width, float height, float dpi_scale) {
- const auto header_font_size = BASE_HEADER_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
- const auto sub_font_size = BASE_SUB_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
- const auto editor_font_size = BASE_EDITOR_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
- const auto char_button_font_size =
- BASE_CHAR_BUTTON_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
- const auto label_button_font_size =
- BASE_LABEL_BUTTON_FONT_SIZE * (height / BASE_HEIGHT) / dpi_scale;
-
- QFont header_font(QStringLiteral("MS Shell Dlg 2"), header_font_size, QFont::Normal);
- QFont sub_font(QStringLiteral("MS Shell Dlg 2"), sub_font_size, QFont::Normal);
- QFont editor_font(QStringLiteral("MS Shell Dlg 2"), editor_font_size, QFont::Normal);
- QFont char_button_font(QStringLiteral("MS Shell Dlg 2"), char_button_font_size, QFont::Normal);
- QFont label_button_font(QStringLiteral("MS Shell Dlg 2"), label_button_font_size,
- QFont::Normal);
-
- ui->label_header->setFont(header_font);
- ui->label_sub->setFont(sub_font);
- ui->line_edit_osk->setFont(editor_font);
- ui->text_edit_osk->setFont(editor_font);
- ui->label_characters->setFont(sub_font);
- ui->label_characters_box->setFont(sub_font);
-
- ui->label_shift->setFont(label_button_font);
- ui->label_shift_shift->setFont(label_button_font);
- ui->label_cancel->setFont(label_button_font);
- ui->label_cancel_shift->setFont(label_button_font);
- ui->label_cancel_num->setFont(label_button_font);
- ui->label_enter->setFont(label_button_font);
- ui->label_enter_shift->setFont(label_button_font);
- ui->label_enter_num->setFont(label_button_font);
-
- for (auto* button : all_buttons) {
- if (button == ui->button_return || button == ui->button_return_shift) {
- button->setFont(label_button_font);
- continue;
- }
-
- if (button == ui->button_space || button == ui->button_space_shift) {
- button->setFont(label_button_font);
- continue;
- }
-
- if (button == ui->button_shift || button == ui->button_shift_shift) {
- button->setFont(label_button_font);
- button->setIconSize(QSize(BASE_ICON_BUTTON_SIZE, BASE_ICON_BUTTON_SIZE) *
- (height / BASE_HEIGHT));
- continue;
- }
-
- if (button == ui->button_backspace || button == ui->button_backspace_shift ||
- button == ui->button_backspace_num) {
- button->setFont(label_button_font);
- button->setIconSize(QSize(BASE_ICON_BUTTON_SIZE, BASE_ICON_BUTTON_SIZE) *
- (height / BASE_HEIGHT));
- continue;
- }
-
- if (button == ui->button_ok || button == ui->button_ok_shift ||
- button == ui->button_ok_num) {
- button->setFont(label_button_font);
- continue;
- }
-
- button->setFont(char_button_font);
- }
-}
-
-void QtSoftwareKeyboardDialog::SetKeyboardType() {
- switch (initialize_parameters.type) {
- case SwkbdType::Normal:
- case SwkbdType::Qwerty:
- case SwkbdType::Unknown3:
- case SwkbdType::Latin:
- case SwkbdType::SimplifiedChinese:
- case SwkbdType::TraditionalChinese:
- case SwkbdType::Korean:
- default: {
- bottom_osk_index = BottomOSKIndex::LowerCase;
- ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
-
- ui->verticalLayout_2->setStretch(0, 320);
- ui->verticalLayout_2->setStretch(1, 400);
-
- ui->gridLineOSK->setRowStretch(5, 94);
- ui->gridBoxOSK->setRowStretch(2, 81);
- break;
- }
- case SwkbdType::NumberPad: {
- bottom_osk_index = BottomOSKIndex::NumberPad;
- ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
-
- ui->verticalLayout_2->setStretch(0, 370);
- ui->verticalLayout_2->setStretch(1, 350);
-
- ui->gridLineOSK->setRowStretch(5, 144);
- ui->gridBoxOSK->setRowStretch(2, 131);
- break;
- }
- }
-}
-
-void QtSoftwareKeyboardDialog::SetPasswordMode() {
- switch (initialize_parameters.password_mode) {
- case SwkbdPasswordMode::Disabled:
- default:
- ui->line_edit_osk->setEchoMode(QLineEdit::Normal);
- break;
- case SwkbdPasswordMode::Enabled:
- ui->line_edit_osk->setEchoMode(QLineEdit::Password);
- break;
- }
-}
-
-void QtSoftwareKeyboardDialog::SetTextDrawType() {
- switch (initialize_parameters.text_draw_type) {
- case SwkbdTextDrawType::Line:
- case SwkbdTextDrawType::DownloadCode: {
- ui->topOSK->setCurrentIndex(0);
-
- if (initialize_parameters.max_text_length <= 10) {
- ui->gridLineOSK->setColumnStretch(0, 390);
- ui->gridLineOSK->setColumnStretch(1, 500);
- ui->gridLineOSK->setColumnStretch(2, 390);
- } else {
- ui->gridLineOSK->setColumnStretch(0, 130);
- ui->gridLineOSK->setColumnStretch(1, 1020);
- ui->gridLineOSK->setColumnStretch(2, 130);
- }
-
- if (is_inline) {
- return;
- }
-
- connect(ui->line_edit_osk, &QLineEdit::textChanged, [this](const QString& changed_string) {
- const auto is_valid = ValidateInputText(changed_string);
-
- const auto text_length = static_cast<u32>(changed_string.length());
-
- ui->label_characters->setText(QStringLiteral("%1/%2")
- .arg(text_length)
- .arg(initialize_parameters.max_text_length));
-
- ui->button_ok->setEnabled(is_valid);
- ui->button_ok_shift->setEnabled(is_valid);
- ui->button_ok_num->setEnabled(is_valid);
-
- ui->line_edit_osk->setFocus();
- });
-
- connect(ui->line_edit_osk, &QLineEdit::cursorPositionChanged,
- [this](int old_cursor_position, int new_cursor_position) {
- ui->button_backspace->setEnabled(
- initialize_parameters.enable_backspace_button && new_cursor_position > 0);
- ui->button_backspace_shift->setEnabled(
- initialize_parameters.enable_backspace_button && new_cursor_position > 0);
- ui->button_backspace_num->setEnabled(
- initialize_parameters.enable_backspace_button && new_cursor_position > 0);
-
- ui->line_edit_osk->setFocus();
- });
-
- connect(
- ui->line_edit_osk, &QLineEdit::returnPressed, this,
- [this] { TranslateButtonPress(HIDButton::Plus); }, Qt::QueuedConnection);
-
- ui->line_edit_osk->setPlaceholderText(
- QString::fromStdU16String(initialize_parameters.guide_text));
- ui->line_edit_osk->setText(QString::fromStdU16String(initialize_parameters.initial_text));
- ui->line_edit_osk->setMaxLength(initialize_parameters.max_text_length);
- ui->line_edit_osk->setCursorPosition(initialize_parameters.initial_cursor_position);
-
- ui->label_characters->setText(QStringLiteral("%1/%2")
- .arg(initialize_parameters.initial_text.size())
- .arg(initialize_parameters.max_text_length));
- break;
- }
- case SwkbdTextDrawType::Box:
- default: {
- ui->topOSK->setCurrentIndex(1);
-
- if (is_inline) {
- return;
- }
-
- connect(ui->text_edit_osk, &QTextEdit::textChanged, [this] {
- if (static_cast<u32>(ui->text_edit_osk->toPlainText().length()) >
- initialize_parameters.max_text_length) {
- auto text_cursor = ui->text_edit_osk->textCursor();
- ui->text_edit_osk->setTextCursor(text_cursor);
- text_cursor.deletePreviousChar();
- }
-
- const auto is_valid = ValidateInputText(ui->text_edit_osk->toPlainText());
-
- const auto text_length = static_cast<u32>(ui->text_edit_osk->toPlainText().length());
-
- ui->label_characters_box->setText(QStringLiteral("%1/%2")
- .arg(text_length)
- .arg(initialize_parameters.max_text_length));
-
- ui->button_ok->setEnabled(is_valid);
- ui->button_ok_shift->setEnabled(is_valid);
- ui->button_ok_num->setEnabled(is_valid);
-
- ui->text_edit_osk->setFocus();
- });
-
- connect(ui->text_edit_osk, &QTextEdit::cursorPositionChanged, [this] {
- const auto new_cursor_position = ui->text_edit_osk->textCursor().position();
-
- ui->button_backspace->setEnabled(initialize_parameters.enable_backspace_button &&
- new_cursor_position > 0);
- ui->button_backspace_shift->setEnabled(initialize_parameters.enable_backspace_button &&
- new_cursor_position > 0);
- ui->button_backspace_num->setEnabled(initialize_parameters.enable_backspace_button &&
- new_cursor_position > 0);
-
- ui->text_edit_osk->setFocus();
- });
-
- ui->text_edit_osk->setPlaceholderText(
- QString::fromStdU16String(initialize_parameters.guide_text));
- ui->text_edit_osk->setText(QString::fromStdU16String(initialize_parameters.initial_text));
- ui->text_edit_osk->moveCursor(initialize_parameters.initial_cursor_position == 0
- ? QTextCursor::Start
- : QTextCursor::End);
-
- ui->label_characters_box->setText(QStringLiteral("%1/%2")
- .arg(initialize_parameters.initial_text.size())
- .arg(initialize_parameters.max_text_length));
- break;
- }
- }
-}
-
-void QtSoftwareKeyboardDialog::SetControllerImage() {
- const auto controller_type = Settings::values.players.GetValue()[8].connected
- ? Settings::values.players.GetValue()[8].controller_type
- : Settings::values.players.GetValue()[0].controller_type;
-
- const QString theme = [] {
- if (QIcon::themeName().contains(QStringLiteral("dark")) ||
- QIcon::themeName().contains(QStringLiteral("midnight"))) {
- return QStringLiteral("_dark");
- } else {
- return QString{};
- }
- }();
-
- switch (controller_type) {
- case Settings::ControllerType::ProController:
- case Settings::ControllerType::GameCube:
- ui->icon_controller->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_pro%1.png);").arg(theme));
- ui->icon_controller_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_pro%1.png);").arg(theme));
- ui->icon_controller_num->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_pro%1.png);").arg(theme));
- break;
- case Settings::ControllerType::DualJoyconDetached:
- ui->icon_controller->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_dual_joycon%1.png);").arg(theme));
- ui->icon_controller_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_dual_joycon%1.png);").arg(theme));
- ui->icon_controller_num->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_dual_joycon%1.png);").arg(theme));
- break;
- case Settings::ControllerType::LeftJoycon:
- ui->icon_controller->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_single_joycon_left%1.png);")
- .arg(theme));
- ui->icon_controller_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_single_joycon_left%1.png);")
- .arg(theme));
- ui->icon_controller_num->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_single_joycon_left%1.png);")
- .arg(theme));
- break;
- case Settings::ControllerType::RightJoycon:
- ui->icon_controller->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_single_joycon_right%1.png);")
- .arg(theme));
- ui->icon_controller_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_single_joycon_right%1.png);")
- .arg(theme));
- ui->icon_controller_num->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_single_joycon_right%1.png);")
- .arg(theme));
- break;
- case Settings::ControllerType::Handheld:
- ui->icon_controller->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_handheld%1.png);").arg(theme));
- ui->icon_controller_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_handheld%1.png);").arg(theme));
- ui->icon_controller_num->setStyleSheet(
- QStringLiteral("image: url(:/overlay/controller_handheld%1.png);").arg(theme));
- break;
- default:
- break;
- }
-}
-
-void QtSoftwareKeyboardDialog::DisableKeyboardButtons() {
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- case BottomOSKIndex::UpperCase:
- default: {
- for (const auto& keys : keyboard_buttons) {
- for (const auto& rows : keys) {
- for (auto* button : rows) {
- if (!button) {
- continue;
- }
-
- button->setEnabled(true);
- }
- }
- }
-
- const auto& key_disable_flags = initialize_parameters.key_disable_flags;
-
- ui->button_space->setDisabled(key_disable_flags.space);
- ui->button_space_shift->setDisabled(key_disable_flags.space);
-
- ui->button_at->setDisabled(key_disable_flags.at || key_disable_flags.username);
-
- ui->button_percent->setDisabled(key_disable_flags.percent || key_disable_flags.username);
-
- ui->button_slash->setDisabled(key_disable_flags.slash);
-
- ui->button_1->setDisabled(key_disable_flags.numbers);
- ui->button_2->setDisabled(key_disable_flags.numbers);
- ui->button_3->setDisabled(key_disable_flags.numbers);
- ui->button_4->setDisabled(key_disable_flags.numbers);
- ui->button_5->setDisabled(key_disable_flags.numbers);
- ui->button_6->setDisabled(key_disable_flags.numbers);
- ui->button_7->setDisabled(key_disable_flags.numbers);
- ui->button_8->setDisabled(key_disable_flags.numbers);
- ui->button_9->setDisabled(key_disable_flags.numbers);
- ui->button_0->setDisabled(key_disable_flags.numbers);
-
- ui->button_return->setEnabled(initialize_parameters.enable_return_button);
- ui->button_return_shift->setEnabled(initialize_parameters.enable_return_button);
- break;
- }
- case BottomOSKIndex::NumberPad: {
- for (const auto& rows : numberpad_buttons) {
- for (auto* button : rows) {
- if (!button) {
- continue;
- }
-
- button->setEnabled(true);
- }
- }
- break;
- }
- }
-}
-
-void QtSoftwareKeyboardDialog::SetBackspaceOkEnabled() {
- if (is_inline) {
- ui->button_ok->setEnabled(current_text.size() >= initialize_parameters.min_text_length);
- ui->button_ok_shift->setEnabled(current_text.size() >=
- initialize_parameters.min_text_length);
- ui->button_ok_num->setEnabled(current_text.size() >= initialize_parameters.min_text_length);
-
- ui->button_backspace->setEnabled(initialize_parameters.enable_backspace_button &&
- cursor_position > 0);
- ui->button_backspace_shift->setEnabled(initialize_parameters.enable_backspace_button &&
- cursor_position > 0);
- ui->button_backspace_num->setEnabled(initialize_parameters.enable_backspace_button &&
- cursor_position > 0);
- } else {
- const auto text_length = [this] {
- if (ui->topOSK->currentIndex() == 1) {
- return static_cast<u32>(ui->text_edit_osk->toPlainText().length());
- } else {
- return static_cast<u32>(ui->line_edit_osk->text().length());
- }
- }();
-
- const auto normal_cursor_position = [this] {
- if (ui->topOSK->currentIndex() == 1) {
- return ui->text_edit_osk->textCursor().position();
- } else {
- return ui->line_edit_osk->cursorPosition();
- }
- }();
-
- ui->button_ok->setEnabled(text_length >= initialize_parameters.min_text_length);
- ui->button_ok_shift->setEnabled(text_length >= initialize_parameters.min_text_length);
- ui->button_ok_num->setEnabled(text_length >= initialize_parameters.min_text_length);
-
- ui->button_backspace->setEnabled(initialize_parameters.enable_backspace_button &&
- normal_cursor_position > 0);
- ui->button_backspace_shift->setEnabled(initialize_parameters.enable_backspace_button &&
- normal_cursor_position > 0);
- ui->button_backspace_num->setEnabled(initialize_parameters.enable_backspace_button &&
- normal_cursor_position > 0);
- }
-}
-
-bool QtSoftwareKeyboardDialog::ValidateInputText(const QString& input_text) {
- const auto& key_disable_flags = initialize_parameters.key_disable_flags;
-
- const auto input_text_length = static_cast<u32>(input_text.length());
-
- if (input_text_length < initialize_parameters.min_text_length ||
- input_text_length > initialize_parameters.max_text_length) {
- return false;
- }
-
- if (key_disable_flags.space && input_text.contains(QLatin1Char{' '})) {
- return false;
- }
-
- if ((key_disable_flags.at || key_disable_flags.username) &&
- input_text.contains(QLatin1Char{'@'})) {
- return false;
- }
-
- if ((key_disable_flags.percent || key_disable_flags.username) &&
- input_text.contains(QLatin1Char{'%'})) {
- return false;
- }
-
- if (key_disable_flags.slash && input_text.contains(QLatin1Char{'/'})) {
- return false;
- }
-
- if ((key_disable_flags.backslash || key_disable_flags.username) &&
- input_text.contains(QLatin1Char('\\'))) {
- return false;
- }
-
- if (key_disable_flags.numbers &&
- std::any_of(input_text.begin(), input_text.end(), [](QChar c) { return c.isDigit(); })) {
- return false;
- }
-
- if (bottom_osk_index == BottomOSKIndex::NumberPad &&
- std::any_of(input_text.begin(), input_text.end(), [](QChar c) { return !c.isDigit(); })) {
- return false;
- }
-
- return true;
-}
-
-void QtSoftwareKeyboardDialog::ChangeBottomOSKIndex() {
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- bottom_osk_index = BottomOSKIndex::UpperCase;
- ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
-
- ui->button_shift_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/osk_button_shift_lock_off.png);"
- "\nimage-position: left;"));
-
- ui->button_shift_shift->setIconSize(ui->button_shift->iconSize());
- ui->button_backspace_shift->setIconSize(ui->button_backspace->iconSize());
- break;
- case BottomOSKIndex::UpperCase:
- if (caps_lock_enabled) {
- caps_lock_enabled = false;
-
- ui->button_shift_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/osk_button_shift_lock_off.png);"
- "\nimage-position: left;"));
-
- ui->button_shift_shift->setIconSize(ui->button_shift->iconSize());
- ui->button_backspace_shift->setIconSize(ui->button_backspace->iconSize());
-
- ui->label_shift_shift->setText(QStringLiteral("Caps Lock"));
-
- bottom_osk_index = BottomOSKIndex::LowerCase;
- ui->bottomOSK->setCurrentIndex(static_cast<int>(bottom_osk_index));
- } else {
- caps_lock_enabled = true;
-
- ui->button_shift_shift->setStyleSheet(
- QStringLiteral("image: url(:/overlay/osk_button_shift_lock_on.png);"
- "\nimage-position: left;"));
-
- ui->button_shift_shift->setIconSize(ui->button_shift->iconSize());
- ui->button_backspace_shift->setIconSize(ui->button_backspace->iconSize());
-
- ui->label_shift_shift->setText(QStringLiteral("Caps Lock Off"));
- }
- break;
- case BottomOSKIndex::NumberPad:
- default:
- break;
- }
-}
-
-void QtSoftwareKeyboardDialog::NormalKeyboardButtonClicked(QPushButton* button) {
- if (button == ui->button_ampersand) {
- if (ui->topOSK->currentIndex() == 1) {
- ui->text_edit_osk->insertPlainText(QStringLiteral("&"));
- } else {
- ui->line_edit_osk->insert(QStringLiteral("&"));
- }
- return;
- }
-
- if (button == ui->button_return || button == ui->button_return_shift) {
- if (ui->topOSK->currentIndex() == 1) {
- ui->text_edit_osk->insertPlainText(QStringLiteral("\n"));
- } else {
- ui->line_edit_osk->insert(QStringLiteral("\n"));
- }
- return;
- }
-
- if (button == ui->button_space || button == ui->button_space_shift) {
- if (ui->topOSK->currentIndex() == 1) {
- ui->text_edit_osk->insertPlainText(QStringLiteral(" "));
- } else {
- ui->line_edit_osk->insert(QStringLiteral(" "));
- }
- return;
- }
-
- if (button == ui->button_shift || button == ui->button_shift_shift) {
- ChangeBottomOSKIndex();
- return;
- }
-
- if (button == ui->button_backspace || button == ui->button_backspace_shift ||
- button == ui->button_backspace_num) {
- if (ui->topOSK->currentIndex() == 1) {
- auto text_cursor = ui->text_edit_osk->textCursor();
- ui->text_edit_osk->setTextCursor(text_cursor);
- text_cursor.deletePreviousChar();
- } else {
- ui->line_edit_osk->backspace();
- }
- return;
- }
-
- if (button == ui->button_ok || button == ui->button_ok_shift || button == ui->button_ok_num) {
- auto text = ui->topOSK->currentIndex() == 1
- ? ui->text_edit_osk->toPlainText().toStdU16String()
- : ui->line_edit_osk->text().toStdU16String();
-
- emit SubmitNormalText(SwkbdResult::Ok, std::move(text));
- return;
- }
-
- if (ui->topOSK->currentIndex() == 1) {
- ui->text_edit_osk->insertPlainText(button->text());
- } else {
- ui->line_edit_osk->insert(button->text());
- }
-
- // Revert the keyboard to lowercase if the shift key is active.
- if (bottom_osk_index == BottomOSKIndex::UpperCase && !caps_lock_enabled) {
- // This is set to true since ChangeBottomOSKIndex will change bottom_osk_index to LowerCase
- // if bottom_osk_index is UpperCase and caps_lock_enabled is true.
- caps_lock_enabled = true;
- ChangeBottomOSKIndex();
- }
-}
-
-void QtSoftwareKeyboardDialog::InlineKeyboardButtonClicked(QPushButton* button) {
- if (!button->isEnabled()) {
- return;
- }
-
- if (button == ui->button_ampersand) {
- InlineTextInsertString(u"&");
- return;
- }
-
- if (button == ui->button_return || button == ui->button_return_shift) {
- InlineTextInsertString(u"\n");
- return;
- }
-
- if (button == ui->button_space || button == ui->button_space_shift) {
- InlineTextInsertString(u" ");
- return;
- }
-
- if (button == ui->button_shift || button == ui->button_shift_shift) {
- ChangeBottomOSKIndex();
- return;
- }
-
- if (button == ui->button_backspace || button == ui->button_backspace_shift ||
- button == ui->button_backspace_num) {
- if (cursor_position <= 0 || current_text.empty()) {
- cursor_position = 0;
- return;
- }
-
- --cursor_position;
-
- current_text.erase(cursor_position, 1);
-
- SetBackspaceOkEnabled();
-
- emit SubmitInlineText(SwkbdReplyType::ChangedString, current_text, cursor_position);
- return;
- }
-
- if (button == ui->button_ok || button == ui->button_ok_shift || button == ui->button_ok_num) {
- emit SubmitInlineText(SwkbdReplyType::DecidedEnter, current_text, cursor_position);
- return;
- }
-
- InlineTextInsertString(button->text().toStdU16String());
-
- // Revert the keyboard to lowercase if the shift key is active.
- if (bottom_osk_index == BottomOSKIndex::UpperCase && !caps_lock_enabled) {
- // This is set to true since ChangeBottomOSKIndex will change bottom_osk_index to LowerCase
- // if bottom_osk_index is UpperCase and caps_lock_enabled is true.
- caps_lock_enabled = true;
- ChangeBottomOSKIndex();
- }
-}
-
-void QtSoftwareKeyboardDialog::InlineTextInsertString(std::u16string_view string) {
- if ((current_text.size() + string.size()) > initialize_parameters.max_text_length) {
- return;
- }
-
- current_text.insert(cursor_position, string);
-
- cursor_position += static_cast<s32>(string.size());
-
- SetBackspaceOkEnabled();
-
- emit SubmitInlineText(SwkbdReplyType::ChangedString, current_text, cursor_position);
-}
-
-void QtSoftwareKeyboardDialog::SetupMouseHover() {
- // setFocus() has a bug where continuously changing focus will cause the focus UI to
- // mysteriously disappear. A workaround we have found is using the mouse to hover over
- // the buttons to act in place of the button focus. As a result, we will have to set
- // a blank cursor when hovering over all the buttons and set a no focus policy so the
- // buttons do not stay in focus in addition to the mouse hover.
- for (auto* button : all_buttons) {
- button->setCursor(QCursor(Qt::BlankCursor));
- button->setFocusPolicy(Qt::NoFocus);
- }
-}
-
-template <HIDButton... T>
-void QtSoftwareKeyboardDialog::HandleButtonPressedOnce() {
- const auto f = [this](HIDButton button) {
- if (input_interpreter->IsButtonPressedOnce(button)) {
- TranslateButtonPress(button);
- }
- };
-
- (f(T), ...);
-}
-
-template <HIDButton... T>
-void QtSoftwareKeyboardDialog::HandleButtonHold() {
- const auto f = [this](HIDButton button) {
- if (input_interpreter->IsButtonHeld(button)) {
- TranslateButtonPress(button);
- }
- };
-
- (f(T), ...);
-}
-
-void QtSoftwareKeyboardDialog::TranslateButtonPress(HIDButton button) {
- switch (button) {
- case HIDButton::A:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- case BottomOSKIndex::UpperCase:
- keyboard_buttons[static_cast<std::size_t>(bottom_osk_index)][row][column]->click();
- break;
- case BottomOSKIndex::NumberPad:
- numberpad_buttons[row][column]->click();
- break;
- default:
- break;
- }
- break;
- case HIDButton::B:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- ui->button_backspace->click();
- break;
- case BottomOSKIndex::UpperCase:
- ui->button_backspace_shift->click();
- break;
- case BottomOSKIndex::NumberPad:
- ui->button_backspace_num->click();
- break;
- default:
- break;
- }
- break;
- case HIDButton::X:
- if (is_inline) {
- emit SubmitInlineText(SwkbdReplyType::DecidedCancel, current_text, cursor_position);
- } else {
- auto text = ui->topOSK->currentIndex() == 1
- ? ui->text_edit_osk->toPlainText().toStdU16String()
- : ui->line_edit_osk->text().toStdU16String();
-
- emit SubmitNormalText(SwkbdResult::Cancel, std::move(text));
- }
- break;
- case HIDButton::Y:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- ui->button_space->click();
- break;
- case BottomOSKIndex::UpperCase:
- ui->button_space_shift->click();
- break;
- case BottomOSKIndex::NumberPad:
- default:
- break;
- }
- break;
- case HIDButton::LStick:
- case HIDButton::RStick:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- ui->button_shift->click();
- break;
- case BottomOSKIndex::UpperCase:
- ui->button_shift_shift->click();
- break;
- case BottomOSKIndex::NumberPad:
- default:
- break;
- }
- break;
- case HIDButton::L:
- MoveTextCursorDirection(Direction::Left);
- break;
- case HIDButton::R:
- MoveTextCursorDirection(Direction::Right);
- break;
- case HIDButton::Plus:
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- ui->button_ok->click();
- break;
- case BottomOSKIndex::UpperCase:
- ui->button_ok_shift->click();
- break;
- case BottomOSKIndex::NumberPad:
- ui->button_ok_num->click();
- break;
- default:
- break;
- }
- break;
- case HIDButton::DLeft:
- case HIDButton::LStickLeft:
- case HIDButton::RStickLeft:
- MoveButtonDirection(Direction::Left);
- break;
- case HIDButton::DUp:
- case HIDButton::LStickUp:
- case HIDButton::RStickUp:
- MoveButtonDirection(Direction::Up);
- break;
- case HIDButton::DRight:
- case HIDButton::LStickRight:
- case HIDButton::RStickRight:
- MoveButtonDirection(Direction::Right);
- break;
- case HIDButton::DDown:
- case HIDButton::LStickDown:
- case HIDButton::RStickDown:
- MoveButtonDirection(Direction::Down);
- break;
- default:
- break;
- }
-}
-
-void QtSoftwareKeyboardDialog::MoveButtonDirection(Direction direction) {
- // Changes the row or column index depending on the direction.
- auto move_direction = [this, direction](std::size_t max_rows, std::size_t max_columns) {
- switch (direction) {
- case Direction::Left:
- column = (column + max_columns - 1) % max_columns;
- break;
- case Direction::Up:
- row = (row + max_rows - 1) % max_rows;
- break;
- case Direction::Right:
- column = (column + 1) % max_columns;
- break;
- case Direction::Down:
- row = (row + 1) % max_rows;
- break;
- default:
- break;
- }
- };
-
- switch (bottom_osk_index) {
- case BottomOSKIndex::LowerCase:
- case BottomOSKIndex::UpperCase: {
- const auto index = static_cast<std::size_t>(bottom_osk_index);
-
- const auto* const prev_button = keyboard_buttons[index][row][column];
- move_direction(NUM_ROWS_NORMAL, NUM_COLUMNS_NORMAL);
- auto* curr_button = keyboard_buttons[index][row][column];
-
- while (!curr_button || !curr_button->isEnabled() || curr_button == prev_button) {
- move_direction(NUM_ROWS_NORMAL, NUM_COLUMNS_NORMAL);
- curr_button = keyboard_buttons[index][row][column];
- }
-
- // This is a workaround for setFocus() randomly not showing focus in the UI
- QCursor::setPos(curr_button->mapToGlobal(curr_button->rect().center()));
- break;
- }
- case BottomOSKIndex::NumberPad: {
- const auto* const prev_button = numberpad_buttons[row][column];
- move_direction(NUM_ROWS_NUMPAD, NUM_COLUMNS_NUMPAD);
- auto* curr_button = numberpad_buttons[row][column];
-
- while (!curr_button || !curr_button->isEnabled() || curr_button == prev_button) {
- move_direction(NUM_ROWS_NUMPAD, NUM_COLUMNS_NUMPAD);
- curr_button = numberpad_buttons[row][column];
- }
-
- // This is a workaround for setFocus() randomly not showing focus in the UI
- QCursor::setPos(curr_button->mapToGlobal(curr_button->rect().center()));
- break;
- }
- default:
- break;
- }
-}
-
-void QtSoftwareKeyboardDialog::MoveTextCursorDirection(Direction direction) {
- switch (direction) {
- case Direction::Left:
- if (is_inline) {
- if (cursor_position <= 0) {
- cursor_position = 0;
- } else {
- --cursor_position;
- emit SubmitInlineText(SwkbdReplyType::MovedCursor, current_text, cursor_position);
- }
- } else {
- if (ui->topOSK->currentIndex() == 1) {
- ui->text_edit_osk->moveCursor(QTextCursor::Left);
- } else {
- ui->line_edit_osk->setCursorPosition(ui->line_edit_osk->cursorPosition() - 1);
- }
- }
- break;
- case Direction::Right:
- if (is_inline) {
- if (cursor_position >= static_cast<s32>(current_text.size())) {
- cursor_position = static_cast<s32>(current_text.size());
- } else {
- ++cursor_position;
- emit SubmitInlineText(SwkbdReplyType::MovedCursor, current_text, cursor_position);
- }
- } else {
- if (ui->topOSK->currentIndex() == 1) {
- ui->text_edit_osk->moveCursor(QTextCursor::Right);
- } else {
- ui->line_edit_osk->setCursorPosition(ui->line_edit_osk->cursorPosition() + 1);
- }
- }
- break;
- default:
- break;
- }
-}
-
-void QtSoftwareKeyboardDialog::StartInputThread() {
- if (input_thread_running) {
- return;
- }
-
- input_thread_running = true;
-
- input_thread = std::thread(&QtSoftwareKeyboardDialog::InputThread, this);
-}
-
-void QtSoftwareKeyboardDialog::StopInputThread() {
- input_thread_running = false;
-
- if (input_thread.joinable()) {
- input_thread.join();
- }
-
- if (input_interpreter) {
- input_interpreter->ResetButtonStates();
- }
-}
-
-void QtSoftwareKeyboardDialog::InputThread() {
- while (input_thread_running) {
- input_interpreter->PollInput();
-
- HandleButtonPressedOnce<HIDButton::A, HIDButton::B, HIDButton::X, HIDButton::Y,
- HIDButton::LStick, HIDButton::RStick, HIDButton::L, HIDButton::R,
- HIDButton::Plus, HIDButton::DLeft, HIDButton::DUp,
- HIDButton::DRight, HIDButton::DDown, HIDButton::LStickLeft,
- HIDButton::LStickUp, HIDButton::LStickRight, HIDButton::LStickDown,
- HIDButton::RStickLeft, HIDButton::RStickUp, HIDButton::RStickRight,
- HIDButton::RStickDown>();
-
- HandleButtonHold<HIDButton::B, HIDButton::L, HIDButton::R, HIDButton::DLeft, HIDButton::DUp,
- HIDButton::DRight, HIDButton::DDown, HIDButton::LStickLeft,
- HIDButton::LStickUp, HIDButton::LStickRight, HIDButton::LStickDown,
- HIDButton::RStickLeft, HIDButton::RStickUp, HIDButton::RStickRight,
- HIDButton::RStickDown>();
-
- std::this_thread::sleep_for(std::chrono::milliseconds(50));
- }
-}
-
-QtSoftwareKeyboard::QtSoftwareKeyboard(GMainWindow& main_window) {
- connect(this, &QtSoftwareKeyboard::MainWindowInitializeKeyboard, &main_window,
- &GMainWindow::SoftwareKeyboardInitialize, Qt::QueuedConnection);
- connect(this, &QtSoftwareKeyboard::MainWindowShowNormalKeyboard, &main_window,
- &GMainWindow::SoftwareKeyboardShowNormal, Qt::QueuedConnection);
- connect(this, &QtSoftwareKeyboard::MainWindowShowTextCheckDialog, &main_window,
- &GMainWindow::SoftwareKeyboardShowTextCheck, Qt::QueuedConnection);
- connect(this, &QtSoftwareKeyboard::MainWindowShowInlineKeyboard, &main_window,
- &GMainWindow::SoftwareKeyboardShowInline, Qt::QueuedConnection);
- connect(this, &QtSoftwareKeyboard::MainWindowHideInlineKeyboard, &main_window,
- &GMainWindow::SoftwareKeyboardHideInline, Qt::QueuedConnection);
- connect(this, &QtSoftwareKeyboard::MainWindowInlineTextChanged, &main_window,
- &GMainWindow::SoftwareKeyboardInlineTextChanged, Qt::QueuedConnection);
- connect(this, &QtSoftwareKeyboard::MainWindowExitKeyboard, &main_window,
- &GMainWindow::SoftwareKeyboardExit, Qt::QueuedConnection);
- connect(&main_window, &GMainWindow::SoftwareKeyboardSubmitNormalText, this,
- &QtSoftwareKeyboard::SubmitNormalText, Qt::QueuedConnection);
- connect(&main_window, &GMainWindow::SoftwareKeyboardSubmitInlineText, this,
- &QtSoftwareKeyboard::SubmitInlineText, Qt::QueuedConnection);
-}
-
-QtSoftwareKeyboard::~QtSoftwareKeyboard() = default;
-
-void QtSoftwareKeyboard::InitializeKeyboard(
- bool is_inline, Core::Frontend::KeyboardInitializeParameters initialize_parameters,
- std::function<void(Service::AM::Applets::SwkbdResult, std::u16string)> submit_normal_callback_,
- std::function<void(Service::AM::Applets::SwkbdReplyType, std::u16string, s32)>
- submit_inline_callback_) {
- if (is_inline) {
- submit_inline_callback = std::move(submit_inline_callback_);
- } else {
- submit_normal_callback = std::move(submit_normal_callback_);
- }
-
- LOG_INFO(Service_AM,
- "\nKeyboardInitializeParameters:"
- "\nok_text={}"
- "\nheader_text={}"
- "\nsub_text={}"
- "\nguide_text={}"
- "\ninitial_text={}"
- "\nmax_text_length={}"
- "\nmin_text_length={}"
- "\ninitial_cursor_position={}"
- "\ntype={}"
- "\npassword_mode={}"
- "\ntext_draw_type={}"
- "\nkey_disable_flags={}"
- "\nuse_blur_background={}"
- "\nenable_backspace_button={}"
- "\nenable_return_button={}"
- "\ndisable_cancel_button={}",
- Common::UTF16ToUTF8(initialize_parameters.ok_text),
- Common::UTF16ToUTF8(initialize_parameters.header_text),
- Common::UTF16ToUTF8(initialize_parameters.sub_text),
- Common::UTF16ToUTF8(initialize_parameters.guide_text),
- Common::UTF16ToUTF8(initialize_parameters.initial_text),
- initialize_parameters.max_text_length, initialize_parameters.min_text_length,
- initialize_parameters.initial_cursor_position, initialize_parameters.type,
- initialize_parameters.password_mode, initialize_parameters.text_draw_type,
- initialize_parameters.key_disable_flags.raw, initialize_parameters.use_blur_background,
- initialize_parameters.enable_backspace_button,
- initialize_parameters.enable_return_button,
- initialize_parameters.disable_cancel_button);
-
- emit MainWindowInitializeKeyboard(is_inline, std::move(initialize_parameters));
-}
-
-void QtSoftwareKeyboard::ShowNormalKeyboard() const {
- emit MainWindowShowNormalKeyboard();
-}
-
-void QtSoftwareKeyboard::ShowTextCheckDialog(
- Service::AM::Applets::SwkbdTextCheckResult text_check_result,
- std::u16string text_check_message) const {
- emit MainWindowShowTextCheckDialog(text_check_result, std::move(text_check_message));
-}
-
-void QtSoftwareKeyboard::ShowInlineKeyboard(
- Core::Frontend::InlineAppearParameters appear_parameters) const {
- LOG_INFO(Service_AM,
- "\nInlineAppearParameters:"
- "\nmax_text_length={}"
- "\nmin_text_length={}"
- "\nkey_top_scale_x={}"
- "\nkey_top_scale_y={}"
- "\nkey_top_translate_x={}"
- "\nkey_top_translate_y={}"
- "\ntype={}"
- "\nkey_disable_flags={}"
- "\nkey_top_as_floating={}"
- "\nenable_backspace_button={}"
- "\nenable_return_button={}"
- "\ndisable_cancel_button={}",
- appear_parameters.max_text_length, appear_parameters.min_text_length,
- appear_parameters.key_top_scale_x, appear_parameters.key_top_scale_y,
- appear_parameters.key_top_translate_x, appear_parameters.key_top_translate_y,
- appear_parameters.type, appear_parameters.key_disable_flags.raw,
- appear_parameters.key_top_as_floating, appear_parameters.enable_backspace_button,
- appear_parameters.enable_return_button, appear_parameters.disable_cancel_button);
-
- emit MainWindowShowInlineKeyboard(std::move(appear_parameters));
-}
-
-void QtSoftwareKeyboard::HideInlineKeyboard() const {
- emit MainWindowHideInlineKeyboard();
-}
-
-void QtSoftwareKeyboard::InlineTextChanged(
- Core::Frontend::InlineTextParameters text_parameters) const {
- LOG_INFO(Service_AM,
- "\nInlineTextParameters:"
- "\ninput_text={}"
- "\ncursor_position={}",
- Common::UTF16ToUTF8(text_parameters.input_text), text_parameters.cursor_position);
-
- emit MainWindowInlineTextChanged(std::move(text_parameters));
-}
-
-void QtSoftwareKeyboard::ExitKeyboard() const {
- emit MainWindowExitKeyboard();
-}
-
-void QtSoftwareKeyboard::SubmitNormalText(Service::AM::Applets::SwkbdResult result,
- std::u16string submitted_text) const {
- submit_normal_callback(result, submitted_text);
-}
-
-void QtSoftwareKeyboard::SubmitInlineText(Service::AM::Applets::SwkbdReplyType reply_type,
- std::u16string submitted_text,
- s32 cursor_position) const {
- submit_inline_callback(reply_type, submitted_text, cursor_position);
-}
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp
deleted file mode 100644
index 34d3feb55..000000000
--- a/src/yuzu/applets/web_browser.cpp
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#ifdef YUZU_USE_QT_WEB_ENGINE
-#include <QKeyEvent>
-
-#include <QWebEngineProfile>
-#include <QWebEngineScript>
-#include <QWebEngineScriptCollection>
-#include <QWebEngineSettings>
-#include <QWebEngineUrlScheme>
-#endif
-
-#include "common/fs/path_util.h"
-#include "core/core.h"
-#include "core/frontend/input_interpreter.h"
-#include "input_common/keyboard.h"
-#include "input_common/main.h"
-#include "yuzu/applets/web_browser.h"
-#include "yuzu/applets/web_browser_scripts.h"
-#include "yuzu/main.h"
-#include "yuzu/util/url_request_interceptor.h"
-
-#ifdef YUZU_USE_QT_WEB_ENGINE
-
-namespace {
-
-constexpr int HIDButtonToKey(HIDButton button) {
- switch (button) {
- case HIDButton::DLeft:
- case HIDButton::LStickLeft:
- return Qt::Key_Left;
- case HIDButton::DUp:
- case HIDButton::LStickUp:
- return Qt::Key_Up;
- case HIDButton::DRight:
- case HIDButton::LStickRight:
- return Qt::Key_Right;
- case HIDButton::DDown:
- case HIDButton::LStickDown:
- return Qt::Key_Down;
- default:
- return 0;
- }
-}
-
-} // Anonymous namespace
-
-QtNXWebEngineView::QtNXWebEngineView(QWidget* parent, Core::System& system,
- InputCommon::InputSubsystem* input_subsystem_)
- : QWebEngineView(parent), input_subsystem{input_subsystem_},
- url_interceptor(std::make_unique<UrlRequestInterceptor>()),
- input_interpreter(std::make_unique<InputInterpreter>(system)),
- default_profile{QWebEngineProfile::defaultProfile()},
- global_settings{QWebEngineSettings::globalSettings()} {
- QWebEngineScript gamepad;
- QWebEngineScript window_nx;
-
- gamepad.setName(QStringLiteral("gamepad_script.js"));
- window_nx.setName(QStringLiteral("window_nx_script.js"));
-
- gamepad.setSourceCode(QString::fromStdString(GAMEPAD_SCRIPT));
- window_nx.setSourceCode(QString::fromStdString(WINDOW_NX_SCRIPT));
-
- gamepad.setInjectionPoint(QWebEngineScript::DocumentCreation);
- window_nx.setInjectionPoint(QWebEngineScript::DocumentCreation);
-
- gamepad.setWorldId(QWebEngineScript::MainWorld);
- window_nx.setWorldId(QWebEngineScript::MainWorld);
-
- gamepad.setRunsOnSubFrames(true);
- window_nx.setRunsOnSubFrames(true);
-
- default_profile->scripts()->insert(gamepad);
- default_profile->scripts()->insert(window_nx);
-
- default_profile->setRequestInterceptor(url_interceptor.get());
-
- global_settings->setAttribute(QWebEngineSettings::LocalContentCanAccessRemoteUrls, true);
- global_settings->setAttribute(QWebEngineSettings::FullScreenSupportEnabled, true);
- global_settings->setAttribute(QWebEngineSettings::AllowRunningInsecureContent, true);
- global_settings->setAttribute(QWebEngineSettings::FocusOnNavigationEnabled, true);
- global_settings->setAttribute(QWebEngineSettings::AllowWindowActivationFromJavaScript, true);
- global_settings->setAttribute(QWebEngineSettings::ShowScrollBars, false);
-
- global_settings->setFontFamily(QWebEngineSettings::StandardFont, QStringLiteral("Roboto"));
-
- connect(
- page(), &QWebEnginePage::windowCloseRequested, page(),
- [this] {
- if (page()->url() == url_interceptor->GetRequestedURL()) {
- SetFinished(true);
- SetExitReason(Service::AM::Applets::WebExitReason::WindowClosed);
- }
- },
- Qt::QueuedConnection);
-}
-
-QtNXWebEngineView::~QtNXWebEngineView() {
- SetFinished(true);
- StopInputThread();
-}
-
-void QtNXWebEngineView::LoadLocalWebPage(const std::string& main_url,
- const std::string& additional_args) {
- is_local = true;
-
- LoadExtractedFonts();
- SetUserAgent(UserAgent::WebApplet);
- SetFinished(false);
- SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed);
- SetLastURL("http://localhost/");
- StartInputThread();
-
- load(QUrl(QUrl::fromLocalFile(QString::fromStdString(main_url)).toString() +
- QString::fromStdString(additional_args)));
-}
-
-void QtNXWebEngineView::LoadExternalWebPage(const std::string& main_url,
- const std::string& additional_args) {
- is_local = false;
-
- SetUserAgent(UserAgent::WebApplet);
- SetFinished(false);
- SetExitReason(Service::AM::Applets::WebExitReason::EndButtonPressed);
- SetLastURL("http://localhost/");
- StartInputThread();
-
- load(QUrl(QString::fromStdString(main_url) + QString::fromStdString(additional_args)));
-}
-
-void QtNXWebEngineView::SetUserAgent(UserAgent user_agent) {
- const QString user_agent_str = [user_agent] {
- switch (user_agent) {
- case UserAgent::WebApplet:
- default:
- return QStringLiteral("WebApplet");
- case UserAgent::ShopN:
- return QStringLiteral("ShopN");
- case UserAgent::LoginApplet:
- return QStringLiteral("LoginApplet");
- case UserAgent::ShareApplet:
- return QStringLiteral("ShareApplet");
- case UserAgent::LobbyApplet:
- return QStringLiteral("LobbyApplet");
- case UserAgent::WifiWebAuthApplet:
- return QStringLiteral("WifiWebAuthApplet");
- }
- }();
-
- QWebEngineProfile::defaultProfile()->setHttpUserAgent(
- QStringLiteral("Mozilla/5.0 (Nintendo Switch; %1) AppleWebKit/606.4 "
- "(KHTML, like Gecko) NF/6.0.1.15.4 NintendoBrowser/5.1.0.20389")
- .arg(user_agent_str));
-}
-
-bool QtNXWebEngineView::IsFinished() const {
- return finished;
-}
-
-void QtNXWebEngineView::SetFinished(bool finished_) {
- finished = finished_;
-}
-
-Service::AM::Applets::WebExitReason QtNXWebEngineView::GetExitReason() const {
- return exit_reason;
-}
-
-void QtNXWebEngineView::SetExitReason(Service::AM::Applets::WebExitReason exit_reason_) {
- exit_reason = exit_reason_;
-}
-
-const std::string& QtNXWebEngineView::GetLastURL() const {
- return last_url;
-}
-
-void QtNXWebEngineView::SetLastURL(std::string last_url_) {
- last_url = std::move(last_url_);
-}
-
-QString QtNXWebEngineView::GetCurrentURL() const {
- return url_interceptor->GetRequestedURL().toString();
-}
-
-void QtNXWebEngineView::hide() {
- SetFinished(true);
- StopInputThread();
-
- QWidget::hide();
-}
-
-void QtNXWebEngineView::keyPressEvent(QKeyEvent* event) {
- if (is_local) {
- input_subsystem->GetKeyboard()->PressKey(event->key());
- }
-}
-
-void QtNXWebEngineView::keyReleaseEvent(QKeyEvent* event) {
- if (is_local) {
- input_subsystem->GetKeyboard()->ReleaseKey(event->key());
- }
-}
-
-template <HIDButton... T>
-void QtNXWebEngineView::HandleWindowFooterButtonPressedOnce() {
- const auto f = [this](HIDButton button) {
- if (input_interpreter->IsButtonPressedOnce(button)) {
- page()->runJavaScript(
- QStringLiteral("yuzu_key_callbacks[%1] == null;").arg(static_cast<u8>(button)),
- [&](const QVariant& variant) {
- if (variant.toBool()) {
- switch (button) {
- case HIDButton::A:
- SendMultipleKeyPressEvents<Qt::Key_A, Qt::Key_Space, Qt::Key_Return>();
- break;
- case HIDButton::B:
- SendKeyPressEvent(Qt::Key_B);
- break;
- case HIDButton::X:
- SendKeyPressEvent(Qt::Key_X);
- break;
- case HIDButton::Y:
- SendKeyPressEvent(Qt::Key_Y);
- break;
- default:
- break;
- }
- }
- });
-
- page()->runJavaScript(
- QStringLiteral("if (yuzu_key_callbacks[%1] != null) { yuzu_key_callbacks[%1](); }")
- .arg(static_cast<u8>(button)));
- }
- };
-
- (f(T), ...);
-}
-
-template <HIDButton... T>
-void QtNXWebEngineView::HandleWindowKeyButtonPressedOnce() {
- const auto f = [this](HIDButton button) {
- if (input_interpreter->IsButtonPressedOnce(button)) {
- SendKeyPressEvent(HIDButtonToKey(button));
- }
- };
-
- (f(T), ...);
-}
-
-template <HIDButton... T>
-void QtNXWebEngineView::HandleWindowKeyButtonHold() {
- const auto f = [this](HIDButton button) {
- if (input_interpreter->IsButtonHeld(button)) {
- SendKeyPressEvent(HIDButtonToKey(button));
- }
- };
-
- (f(T), ...);
-}
-
-void QtNXWebEngineView::SendKeyPressEvent(int key) {
- if (key == 0) {
- return;
- }
-
- QCoreApplication::postEvent(focusProxy(),
- new QKeyEvent(QKeyEvent::KeyPress, key, Qt::NoModifier));
- QCoreApplication::postEvent(focusProxy(),
- new QKeyEvent(QKeyEvent::KeyRelease, key, Qt::NoModifier));
-}
-
-void QtNXWebEngineView::StartInputThread() {
- if (input_thread_running) {
- return;
- }
-
- input_thread_running = true;
- input_thread = std::thread(&QtNXWebEngineView::InputThread, this);
-}
-
-void QtNXWebEngineView::StopInputThread() {
- if (is_local) {
- QWidget::releaseKeyboard();
- }
-
- input_thread_running = false;
- if (input_thread.joinable()) {
- input_thread.join();
- }
-}
-
-void QtNXWebEngineView::InputThread() {
- // Wait for 1 second before allowing any inputs to be processed.
- std::this_thread::sleep_for(std::chrono::seconds(1));
-
- if (is_local) {
- QWidget::grabKeyboard();
- }
-
- while (input_thread_running) {
- input_interpreter->PollInput();
-
- HandleWindowFooterButtonPressedOnce<HIDButton::A, HIDButton::B, HIDButton::X, HIDButton::Y,
- HIDButton::L, HIDButton::R>();
-
- HandleWindowKeyButtonPressedOnce<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight,
- HIDButton::DDown, HIDButton::LStickLeft,
- HIDButton::LStickUp, HIDButton::LStickRight,
- HIDButton::LStickDown>();
-
- HandleWindowKeyButtonHold<HIDButton::DLeft, HIDButton::DUp, HIDButton::DRight,
- HIDButton::DDown, HIDButton::LStickLeft, HIDButton::LStickUp,
- HIDButton::LStickRight, HIDButton::LStickDown>();
-
- std::this_thread::sleep_for(std::chrono::milliseconds(50));
- }
-}
-
-void QtNXWebEngineView::LoadExtractedFonts() {
- QWebEngineScript nx_font_css;
- QWebEngineScript load_nx_font;
-
- auto fonts_dir_str = Common::FS::PathToUTF8String(
- Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) / "fonts/");
-
- std::replace(fonts_dir_str.begin(), fonts_dir_str.end(), '\\', '/');
-
- const auto fonts_dir = QString::fromStdString(fonts_dir_str);
-
- nx_font_css.setName(QStringLiteral("nx_font_css.js"));
- load_nx_font.setName(QStringLiteral("load_nx_font.js"));
-
- nx_font_css.setSourceCode(
- QString::fromStdString(NX_FONT_CSS)
- .arg(fonts_dir + QStringLiteral("FontStandard.ttf"))
- .arg(fonts_dir + QStringLiteral("FontChineseSimplified.ttf"))
- .arg(fonts_dir + QStringLiteral("FontExtendedChineseSimplified.ttf"))
- .arg(fonts_dir + QStringLiteral("FontChineseTraditional.ttf"))
- .arg(fonts_dir + QStringLiteral("FontKorean.ttf"))
- .arg(fonts_dir + QStringLiteral("FontNintendoExtended.ttf"))
- .arg(fonts_dir + QStringLiteral("FontNintendoExtended2.ttf")));
- load_nx_font.setSourceCode(QString::fromStdString(LOAD_NX_FONT));
-
- nx_font_css.setInjectionPoint(QWebEngineScript::DocumentReady);
- load_nx_font.setInjectionPoint(QWebEngineScript::Deferred);
-
- nx_font_css.setWorldId(QWebEngineScript::MainWorld);
- load_nx_font.setWorldId(QWebEngineScript::MainWorld);
-
- nx_font_css.setRunsOnSubFrames(true);
- load_nx_font.setRunsOnSubFrames(true);
-
- default_profile->scripts()->insert(nx_font_css);
- default_profile->scripts()->insert(load_nx_font);
-
- connect(
- url_interceptor.get(), &UrlRequestInterceptor::FrameChanged, url_interceptor.get(),
- [this] {
- std::this_thread::sleep_for(std::chrono::milliseconds(50));
- page()->runJavaScript(QString::fromStdString(LOAD_NX_FONT));
- },
- Qt::QueuedConnection);
-}
-
-#endif
-
-QtWebBrowser::QtWebBrowser(GMainWindow& main_window) {
- connect(this, &QtWebBrowser::MainWindowOpenWebPage, &main_window,
- &GMainWindow::WebBrowserOpenWebPage, Qt::QueuedConnection);
- connect(&main_window, &GMainWindow::WebBrowserExtractOfflineRomFS, this,
- &QtWebBrowser::MainWindowExtractOfflineRomFS, Qt::QueuedConnection);
- connect(&main_window, &GMainWindow::WebBrowserClosed, this,
- &QtWebBrowser::MainWindowWebBrowserClosed, Qt::QueuedConnection);
-}
-
-QtWebBrowser::~QtWebBrowser() = default;
-
-void QtWebBrowser::OpenLocalWebPage(
- const std::string& local_url, std::function<void()> extract_romfs_callback_,
- std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const {
- extract_romfs_callback = std::move(extract_romfs_callback_);
- callback = std::move(callback_);
-
- const auto index = local_url.find('?');
-
- if (index == std::string::npos) {
- emit MainWindowOpenWebPage(local_url, "", true);
- } else {
- emit MainWindowOpenWebPage(local_url.substr(0, index), local_url.substr(index), true);
- }
-}
-
-void QtWebBrowser::OpenExternalWebPage(
- const std::string& external_url,
- std::function<void(Service::AM::Applets::WebExitReason, std::string)> callback_) const {
- callback = std::move(callback_);
-
- const auto index = external_url.find('?');
-
- if (index == std::string::npos) {
- emit MainWindowOpenWebPage(external_url, "", false);
- } else {
- emit MainWindowOpenWebPage(external_url.substr(0, index), external_url.substr(index),
- false);
- }
-}
-
-void QtWebBrowser::MainWindowExtractOfflineRomFS() {
- extract_romfs_callback();
-}
-
-void QtWebBrowser::MainWindowWebBrowserClosed(Service::AM::Applets::WebExitReason exit_reason,
- std::string last_url) {
- callback(exit_reason, last_url);
-}
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 86495803e..25b658b2a 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -51,11 +51,11 @@ void EmuThread::run() {
Common::SetCurrentThreadName(name.c_str());
auto& system = Core::System::GetInstance();
+ auto& gpu = system.GPU();
+ auto stop_token = stop_source.get_token();
system.RegisterHostThread();
- auto& gpu = system.GPU();
-
// Main process has been loaded. Make the context current to this thread and begin GPU and CPU
// execution.
gpu.Start();
@@ -64,12 +64,13 @@ void EmuThread::run() {
emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
- system.Renderer().ReadRasterizer()->LoadDiskResources(
- system.CurrentProcess()->GetTitleID(), stop_run,
- [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
- emit LoadProgress(stage, value, total);
- });
-
+ if (Settings::values.use_disk_shader_cache.GetValue()) {
+ system.Renderer().ReadRasterizer()->LoadDiskResources(
+ system.CurrentProcess()->GetTitleID(), stop_token,
+ [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
+ emit LoadProgress(stage, value, total);
+ });
+ }
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
gpu.ReleaseContext();
@@ -78,7 +79,7 @@ void EmuThread::run() {
// so that the DebugModeLeft signal can be emitted before the
// next execution step
bool was_active = false;
- while (!stop_run) {
+ while (!stop_token.stop_requested()) {
if (running) {
if (was_active) {
emit DebugModeLeft();
@@ -100,7 +101,7 @@ void EmuThread::run() {
}
running_guard = false;
- if (!stop_run) {
+ if (!stop_token.stop_requested()) {
was_active = true;
emit DebugModeEntered();
}
@@ -108,7 +109,7 @@ void EmuThread::run() {
UNIMPLEMENTED();
} else {
std::unique_lock lock{running_mutex};
- running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
+ running_cv.wait(lock, stop_token, [this] { return IsRunning() || exec_step; });
}
}
@@ -411,8 +412,9 @@ void GRenderWindow::mousePressEvent(QMouseEvent* event) {
if (event->source() == Qt::MouseEventSynthesizedBySystem) {
return;
}
-
- auto pos = event->pos();
+ // Qt sometimes returns the parent coordinates. To avoid this we read the global mouse
+ // coordinates and map them to the current render area
+ const auto pos = mapFromGlobal(QCursor::pos());
const auto [x, y] = ScaleTouch(pos);
const auto button = QtButtonToMouseButton(event->button());
input_subsystem->GetMouse()->PressButton(x, y, button);
@@ -429,7 +431,9 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
if (event->source() == Qt::MouseEventSynthesizedBySystem) {
return;
}
- auto pos = event->pos();
+ // Qt sometimes returns the parent coordinates. To avoid this we read the global mouse
+ // coordinates and map them to the current render area
+ const auto pos = mapFromGlobal(QCursor::pos());
const auto [x, y] = ScaleTouch(pos);
const int center_x = width() / 2;
const int center_y = height() / 2;
@@ -564,6 +568,12 @@ std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedCont
bool GRenderWindow::InitRenderTarget() {
ReleaseRenderTarget();
+ {
+ // Create a dummy render widget so that Qt
+ // places the render window at the correct position.
+ const RenderWidget dummy_widget{this};
+ }
+
first_frame = false;
switch (Settings::values.renderer_backend.GetValue()) {
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index acfe2bc8c..402dd2ee1 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -89,16 +89,16 @@ public:
* Requests for the emulation thread to stop running
*/
void RequestStop() {
- stop_run = true;
+ stop_source.request_stop();
SetRunning(false);
}
private:
bool exec_step = false;
bool running = false;
- std::atomic_bool stop_run{false};
+ std::stop_source stop_source;
std::mutex running_mutex;
- std::condition_variable running_cv;
+ std::condition_variable_any running_cv;
Common::Event running_wait{};
std::atomic_bool running_guard{false};
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 916a22724..72027e773 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -221,7 +221,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
// This must be in alphabetical order according to action name as it must have the same order as
// UISetting::values.shortcuts, which is alphabetically ordered.
// clang-format off
-const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
+const std::array<UISettings::Shortcut, 18> Config::default_hotkeys{{
{QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}},
{QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
{QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -236,6 +236,7 @@ const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
{QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
{QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
{QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
+ {QStringLiteral("Toggle Framerate Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+U"), Qt::ApplicationShortcut}},
{QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F9"), Qt::ApplicationShortcut}},
{QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}},
{QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}},
@@ -271,6 +272,66 @@ void Config::Initialize(const std::string& config_name) {
}
}
+/* {Read,Write}BasicSetting and WriteGlobalSetting templates must be defined here before their
+ * usages later in this file. This allows explicit definition of some types that don't work
+ * nicely with the general version.
+ */
+
+// Explicit std::string definition: Qt can't implicitly convert a std::string to a QVariant, nor
+// can it implicitly convert a QVariant back to a {std::,Q}string
+template <>
+void Config::ReadBasicSetting(Settings::BasicSetting<std::string>& setting) {
+ const QString name = QString::fromStdString(setting.GetLabel());
+ const auto default_value = QString::fromStdString(setting.GetDefault());
+ if (qt_config->value(name + QStringLiteral("/default"), false).toBool()) {
+ setting.SetValue(default_value.toStdString());
+ } else {
+ setting.SetValue(qt_config->value(name, default_value).toString().toStdString());
+ }
+}
+
+template <typename Type>
+void Config::ReadBasicSetting(Settings::BasicSetting<Type>& setting) {
+ const QString name = QString::fromStdString(setting.GetLabel());
+ const Type default_value = setting.GetDefault();
+ if (qt_config->value(name + QStringLiteral("/default"), false).toBool()) {
+ setting.SetValue(default_value);
+ } else {
+ setting.SetValue(
+ static_cast<QVariant>(qt_config->value(name, default_value)).value<Type>());
+ }
+}
+
+// Explicit std::string definition: Qt can't implicitly convert a std::string to a QVariant
+template <>
+void Config::WriteBasicSetting(const Settings::BasicSetting<std::string>& setting) {
+ const QString name = QString::fromStdString(setting.GetLabel());
+ const std::string& value = setting.GetValue();
+ qt_config->setValue(name + QStringLiteral("/default"), value == setting.GetDefault());
+ qt_config->setValue(name, QString::fromStdString(value));
+}
+
+template <typename Type>
+void Config::WriteBasicSetting(const Settings::BasicSetting<Type>& setting) {
+ const QString name = QString::fromStdString(setting.GetLabel());
+ const Type value = setting.GetValue();
+ qt_config->setValue(name + QStringLiteral("/default"), value == setting.GetDefault());
+ qt_config->setValue(name, value);
+}
+
+template <typename Type>
+void Config::WriteGlobalSetting(const Settings::Setting<Type>& setting) {
+ const QString name = QString::fromStdString(setting.GetLabel());
+ const Type& value = setting.GetValue(global);
+ if (!global) {
+ qt_config->setValue(name + QStringLiteral("/use_global"), setting.UsingGlobal());
+ }
+ if (global || !setting.UsingGlobal()) {
+ qt_config->setValue(name + QStringLiteral("/default"), value == setting.GetDefault());
+ qt_config->setValue(name, value);
+ }
+}
+
void Config::ReadPlayerValue(std::size_t player_index) {
const QString player_prefix = [this, player_index] {
if (type == ConfigType::InputProfile) {
@@ -394,8 +455,7 @@ void Config::ReadPlayerValue(std::size_t player_index) {
}
void Config::ReadDebugValues() {
- Settings::values.debug_pad_enabled =
- ReadSetting(QStringLiteral("debug_pad_enabled"), false).toBool();
+ ReadBasicSetting(Settings::values.debug_pad_enabled);
for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
const std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
@@ -431,8 +491,7 @@ void Config::ReadDebugValues() {
}
void Config::ReadKeyboardValues() {
- Settings::values.keyboard_enabled =
- ReadSetting(QStringLiteral("keyboard_enabled"), false).toBool();
+ ReadBasicSetting(Settings::values.keyboard_enabled);
std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -445,7 +504,7 @@ void Config::ReadKeyboardValues() {
}
void Config::ReadMouseValues() {
- Settings::values.mouse_enabled = ReadSetting(QStringLiteral("mouse_enabled"), false).toBool();
+ ReadBasicSetting(Settings::values.mouse_enabled);
for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
const std::string default_param =
@@ -480,18 +539,11 @@ void Config::ReadAudioValues() {
qt_config->beginGroup(QStringLiteral("Audio"));
if (global) {
- Settings::values.sink_id =
- ReadSetting(QStringLiteral("output_engine"), QStringLiteral("auto"))
- .toString()
- .toStdString();
- Settings::values.audio_device_id =
- ReadSetting(QStringLiteral("output_device"), QStringLiteral("auto"))
- .toString()
- .toStdString();
+ ReadBasicSetting(Settings::values.audio_device_id);
+ ReadBasicSetting(Settings::values.sink_id);
}
- ReadSettingGlobal(Settings::values.enable_audio_stretching,
- QStringLiteral("enable_audio_stretching"), true);
- ReadSettingGlobal(Settings::values.volume, QStringLiteral("volume"), 1);
+ ReadGlobalSetting(Settings::values.enable_audio_stretching);
+ ReadGlobalSetting(Settings::values.volume);
qt_config->endGroup();
}
@@ -508,13 +560,11 @@ void Config::ReadControlValues() {
ReadTouchscreenValues();
ReadMotionTouchValues();
- Settings::values.emulate_analog_keyboard =
- ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
+ ReadBasicSetting(Settings::values.emulate_analog_keyboard);
Settings::values.mouse_panning = false;
- Settings::values.mouse_panning_sensitivity =
- ReadSetting(QStringLiteral("mouse_panning_sensitivity"), 1).toFloat();
+ ReadBasicSetting(Settings::values.mouse_panning_sensitivity);
- ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
+ ReadGlobalSetting(Settings::values.use_docked_mode);
// Disable docked mode if handheld is selected
const auto controller_type = Settings::values.players.GetValue()[0].controller_type;
@@ -522,11 +572,9 @@ void Config::ReadControlValues() {
Settings::values.use_docked_mode.SetValue(false);
}
- ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
- true);
- ReadSettingGlobal(Settings::values.enable_accurate_vibrations,
- QStringLiteral("enable_accurate_vibrations"), false);
- ReadSettingGlobal(Settings::values.motion_enabled, QStringLiteral("motion_enabled"), true);
+ ReadGlobalSetting(Settings::values.vibration_enabled);
+ ReadGlobalSetting(Settings::values.enable_accurate_vibrations);
+ ReadGlobalSetting(Settings::values.motion_enabled);
qt_config->endGroup();
}
@@ -564,33 +612,19 @@ void Config::ReadMotionTouchValues() {
}
qt_config->endArray();
- Settings::values.motion_device =
- ReadSetting(QStringLiteral("motion_device"),
- QStringLiteral("engine:motion_emu,update_period:100,sensitivity:0.01"))
- .toString()
- .toStdString();
- Settings::values.touch_device =
- ReadSetting(QStringLiteral("touch_device"),
- QStringLiteral("min_x:100,min_y:50,max_x:1800,max_y:850"))
- .toString()
- .toStdString();
- Settings::values.use_touch_from_button =
- ReadSetting(QStringLiteral("use_touch_from_button"), false).toBool();
- Settings::values.touch_from_button_map_index =
- ReadSetting(QStringLiteral("touch_from_button_map"), 0).toInt();
- Settings::values.touch_from_button_map_index =
- std::clamp(Settings::values.touch_from_button_map_index, 0, num_touch_from_button_maps - 1);
- Settings::values.udp_input_servers =
- ReadSetting(QStringLiteral("udp_input_servers"),
- QString::fromUtf8(InputCommon::CemuhookUDP::DEFAULT_SRV))
- .toString()
- .toStdString();
+ ReadBasicSetting(Settings::values.motion_device);
+ ReadBasicSetting(Settings::values.touch_device);
+ ReadBasicSetting(Settings::values.use_touch_from_button);
+ ReadBasicSetting(Settings::values.touch_from_button_map_index);
+ Settings::values.touch_from_button_map_index = std::clamp(
+ Settings::values.touch_from_button_map_index.GetValue(), 0, num_touch_from_button_maps - 1);
+ ReadBasicSetting(Settings::values.udp_input_servers);
}
void Config::ReadCoreValues() {
qt_config->beginGroup(QStringLiteral("Core"));
- ReadSettingGlobal(Settings::values.use_multi_core, QStringLiteral("use_multi_core"), true);
+ ReadGlobalSetting(Settings::values.use_multi_core);
qt_config->endGroup();
}
@@ -598,7 +632,7 @@ void Config::ReadCoreValues() {
void Config::ReadDataStorageValues() {
qt_config->beginGroup(QStringLiteral("Data Storage"));
- Settings::values.use_virtual_sd = ReadSetting(QStringLiteral("use_virtual_sd"), true).toBool();
+ ReadBasicSetting(Settings::values.use_virtual_sd);
FS::SetYuzuPath(
FS::YuzuPath::NANDDir,
qt_config
@@ -627,12 +661,9 @@ void Config::ReadDataStorageValues() {
QString::fromStdString(FS::GetYuzuPathString(FS::YuzuPath::DumpDir)))
.toString()
.toStdString());
- Settings::values.gamecard_inserted =
- ReadSetting(QStringLiteral("gamecard_inserted"), false).toBool();
- Settings::values.gamecard_current_game =
- ReadSetting(QStringLiteral("gamecard_current_game"), false).toBool();
- Settings::values.gamecard_path =
- ReadSetting(QStringLiteral("gamecard_path"), QString{}).toString().toStdString();
+ ReadBasicSetting(Settings::values.gamecard_inserted);
+ ReadBasicSetting(Settings::values.gamecard_current_game);
+ ReadBasicSetting(Settings::values.gamecard_path);
qt_config->endGroup();
}
@@ -643,34 +674,24 @@ void Config::ReadDebuggingValues() {
// Intentionally not using the QT default setting as this is intended to be changed in the ini
Settings::values.record_frame_times =
qt_config->value(QStringLiteral("record_frame_times"), false).toBool();
- Settings::values.program_args =
- ReadSetting(QStringLiteral("program_args"), QString{}).toString().toStdString();
- Settings::values.dump_exefs = ReadSetting(QStringLiteral("dump_exefs"), false).toBool();
- Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool();
- Settings::values.enable_fs_access_log =
- ReadSetting(QStringLiteral("enable_fs_access_log"), false).toBool();
- Settings::values.reporting_services =
- ReadSetting(QStringLiteral("reporting_services"), false).toBool();
- Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
- Settings::values.disable_macro_jit =
- ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool();
- Settings::values.extended_logging =
- ReadSetting(QStringLiteral("extended_logging"), false).toBool();
- Settings::values.use_debug_asserts =
- ReadSetting(QStringLiteral("use_debug_asserts"), false).toBool();
- Settings::values.use_auto_stub = ReadSetting(QStringLiteral("use_auto_stub"), false).toBool();
+ ReadBasicSetting(Settings::values.program_args);
+ ReadBasicSetting(Settings::values.dump_exefs);
+ ReadBasicSetting(Settings::values.dump_nso);
+ ReadBasicSetting(Settings::values.enable_fs_access_log);
+ ReadBasicSetting(Settings::values.reporting_services);
+ ReadBasicSetting(Settings::values.quest_flag);
+ ReadBasicSetting(Settings::values.disable_macro_jit);
+ ReadBasicSetting(Settings::values.extended_logging);
+ ReadBasicSetting(Settings::values.use_debug_asserts);
+ ReadBasicSetting(Settings::values.use_auto_stub);
qt_config->endGroup();
}
void Config::ReadServiceValues() {
qt_config->beginGroup(QStringLiteral("Services"));
- Settings::values.bcat_backend =
- ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("none"))
- .toString()
- .toStdString();
- Settings::values.bcat_boxcat_local =
- ReadSetting(QStringLiteral("bcat_boxcat_local"), false).toBool();
+ ReadBasicSetting(Settings::values.bcat_backend);
+ ReadBasicSetting(Settings::values.bcat_boxcat_local);
qt_config->endGroup();
}
@@ -696,11 +717,8 @@ void Config::ReadDisabledAddOnValues() {
void Config::ReadMiscellaneousValues() {
qt_config->beginGroup(QStringLiteral("Miscellaneous"));
- Settings::values.log_filter =
- ReadSetting(QStringLiteral("log_filter"), QStringLiteral("*:Info"))
- .toString()
- .toStdString();
- Settings::values.use_dev_keys = ReadSetting(QStringLiteral("use_dev_keys"), false).toBool();
+ ReadBasicSetting(Settings::values.log_filter);
+ ReadBasicSetting(Settings::values.use_dev_keys);
qt_config->endGroup();
}
@@ -750,36 +768,31 @@ void Config::ReadPathValues() {
void Config::ReadCpuValues() {
qt_config->beginGroup(QStringLiteral("Cpu"));
- ReadSettingGlobal(Settings::values.cpu_accuracy, QStringLiteral("cpu_accuracy"), 0);
+ ReadBasicSetting(Settings::values.cpu_accuracy_first_time);
+ if (Settings::values.cpu_accuracy_first_time) {
+ Settings::values.cpu_accuracy.SetValue(Settings::values.cpu_accuracy.GetDefault());
+ Settings::values.cpu_accuracy_first_time.SetValue(false);
+ } else {
+ ReadGlobalSetting(Settings::values.cpu_accuracy);
+ }
- ReadSettingGlobal(Settings::values.cpuopt_unsafe_unfuse_fma,
- QStringLiteral("cpuopt_unsafe_unfuse_fma"), true);
- ReadSettingGlobal(Settings::values.cpuopt_unsafe_reduce_fp_error,
- QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true);
- ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan,
- QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true);
- ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check,
- QStringLiteral("cpuopt_unsafe_fastmem_check"), true);
+ ReadGlobalSetting(Settings::values.cpuopt_unsafe_unfuse_fma);
+ ReadGlobalSetting(Settings::values.cpuopt_unsafe_reduce_fp_error);
+ ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
+ ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
+ ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
if (global) {
- Settings::values.cpuopt_page_tables =
- ReadSetting(QStringLiteral("cpuopt_page_tables"), true).toBool();
- Settings::values.cpuopt_block_linking =
- ReadSetting(QStringLiteral("cpuopt_block_linking"), true).toBool();
- Settings::values.cpuopt_return_stack_buffer =
- ReadSetting(QStringLiteral("cpuopt_return_stack_buffer"), true).toBool();
- Settings::values.cpuopt_fast_dispatcher =
- ReadSetting(QStringLiteral("cpuopt_fast_dispatcher"), true).toBool();
- Settings::values.cpuopt_context_elimination =
- ReadSetting(QStringLiteral("cpuopt_context_elimination"), true).toBool();
- Settings::values.cpuopt_const_prop =
- ReadSetting(QStringLiteral("cpuopt_const_prop"), true).toBool();
- Settings::values.cpuopt_misc_ir =
- ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool();
- Settings::values.cpuopt_reduce_misalign_checks =
- ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool();
- Settings::values.cpuopt_fastmem =
- ReadSetting(QStringLiteral("cpuopt_fastmem"), true).toBool();
+ ReadBasicSetting(Settings::values.cpu_debug_mode);
+ ReadBasicSetting(Settings::values.cpuopt_page_tables);
+ ReadBasicSetting(Settings::values.cpuopt_block_linking);
+ ReadBasicSetting(Settings::values.cpuopt_return_stack_buffer);
+ ReadBasicSetting(Settings::values.cpuopt_fast_dispatcher);
+ ReadBasicSetting(Settings::values.cpuopt_context_elimination);
+ ReadBasicSetting(Settings::values.cpuopt_const_prop);
+ ReadBasicSetting(Settings::values.cpuopt_misc_ir);
+ ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
+ ReadBasicSetting(Settings::values.cpuopt_fastmem);
}
qt_config->endGroup();
@@ -788,38 +801,33 @@ void Config::ReadCpuValues() {
void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
- ReadSettingGlobal(Settings::values.renderer_backend, QStringLiteral("backend"), 0);
- ReadSettingGlobal(Settings::values.renderer_debug, QStringLiteral("debug"), false);
- ReadSettingGlobal(Settings::values.vulkan_device, QStringLiteral("vulkan_device"), 0);
-#ifdef _WIN32
- ReadSettingGlobal(Settings::values.fullscreen_mode, QStringLiteral("fullscreen_mode"), 0);
-#else
- // *nix platforms may have issues with the borderless windowed fullscreen mode.
- // Default to exclusive fullscreen on these platforms for now.
- ReadSettingGlobal(Settings::values.fullscreen_mode, QStringLiteral("fullscreen_mode"), 1);
-#endif
- ReadSettingGlobal(Settings::values.aspect_ratio, QStringLiteral("aspect_ratio"), 0);
- ReadSettingGlobal(Settings::values.max_anisotropy, QStringLiteral("max_anisotropy"), 0);
- ReadSettingGlobal(Settings::values.use_frame_limit, QStringLiteral("use_frame_limit"), true);
- ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100);
- ReadSettingGlobal(Settings::values.use_disk_shader_cache,
- QStringLiteral("use_disk_shader_cache"), true);
- ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
- ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
- QStringLiteral("use_asynchronous_gpu_emulation"), true);
- ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
- true);
- ReadSettingGlobal(Settings::values.accelerate_astc, QStringLiteral("accelerate_astc"), true);
- ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
- ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
- false);
- ReadSettingGlobal(Settings::values.use_asynchronous_shaders,
- QStringLiteral("use_asynchronous_shaders"), false);
- ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
- true);
- ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0);
- ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0);
- ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0);
+ ReadGlobalSetting(Settings::values.renderer_backend);
+ ReadGlobalSetting(Settings::values.vulkan_device);
+ ReadGlobalSetting(Settings::values.fullscreen_mode);
+ ReadGlobalSetting(Settings::values.aspect_ratio);
+ ReadGlobalSetting(Settings::values.max_anisotropy);
+ ReadGlobalSetting(Settings::values.use_speed_limit);
+ ReadGlobalSetting(Settings::values.speed_limit);
+ ReadGlobalSetting(Settings::values.use_disk_shader_cache);
+ ReadGlobalSetting(Settings::values.gpu_accuracy);
+ ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
+ ReadGlobalSetting(Settings::values.use_nvdec_emulation);
+ ReadGlobalSetting(Settings::values.accelerate_astc);
+ ReadGlobalSetting(Settings::values.use_vsync);
+ ReadGlobalSetting(Settings::values.shader_backend);
+ ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
+ ReadGlobalSetting(Settings::values.use_fast_gpu_time);
+ ReadGlobalSetting(Settings::values.use_caches_gc);
+ ReadGlobalSetting(Settings::values.bg_red);
+ ReadGlobalSetting(Settings::values.bg_green);
+ ReadGlobalSetting(Settings::values.bg_blue);
+
+ if (global) {
+ ReadBasicSetting(Settings::values.fps_cap);
+ ReadBasicSetting(Settings::values.renderer_debug);
+ ReadBasicSetting(Settings::values.enable_nsight_aftermath);
+ ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks);
+ }
qt_config->endGroup();
}
@@ -864,15 +872,15 @@ void Config::ReadShortcutValues() {
void Config::ReadSystemValues() {
qt_config->beginGroup(QStringLiteral("System"));
- ReadSettingGlobal(Settings::values.current_user, QStringLiteral("current_user"), 0);
- Settings::values.current_user =
- std::clamp<int>(Settings::values.current_user, 0, Service::Account::MAX_USERS - 1);
+ ReadBasicSetting(Settings::values.current_user);
+ Settings::values.current_user = std::clamp<int>(Settings::values.current_user.GetValue(), 0,
+ Service::Account::MAX_USERS - 1);
- ReadSettingGlobal(Settings::values.language_index, QStringLiteral("language_index"), 1);
+ ReadGlobalSetting(Settings::values.language_index);
- ReadSettingGlobal(Settings::values.region_index, QStringLiteral("region_index"), 1);
+ ReadGlobalSetting(Settings::values.region_index);
- ReadSettingGlobal(Settings::values.time_zone_index, QStringLiteral("time_zone_index"), 0);
+ ReadGlobalSetting(Settings::values.time_zone_index);
bool rng_seed_enabled;
ReadSettingGlobal(rng_seed_enabled, QStringLiteral("rng_seed_enabled"), false);
@@ -898,7 +906,7 @@ void Config::ReadSystemValues() {
}
}
- ReadSettingGlobal(Settings::values.sound_index, QStringLiteral("sound_index"), 1);
+ ReadGlobalSetting(Settings::values.sound_index);
qt_config->endGroup();
}
@@ -909,10 +917,8 @@ void Config::ReadUIValues() {
UISettings::values.theme =
ReadSetting(QStringLiteral("theme"), QString::fromUtf8(UISettings::themes[0].second))
.toString();
- UISettings::values.enable_discord_presence =
- ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
- UISettings::values.select_user_on_boot =
- ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
+ ReadBasicSetting(UISettings::values.enable_discord_presence);
+ ReadBasicSetting(UISettings::values.select_user_on_boot);
ReadUIGamelistValues();
ReadUILayoutValues();
@@ -920,24 +926,17 @@ void Config::ReadUIValues() {
ReadScreenshotValues();
ReadShortcutValues();
- UISettings::values.single_window_mode =
- ReadSetting(QStringLiteral("singleWindowMode"), true).toBool();
- UISettings::values.fullscreen = ReadSetting(QStringLiteral("fullscreen"), false).toBool();
- UISettings::values.display_titlebar =
- ReadSetting(QStringLiteral("displayTitleBars"), true).toBool();
- UISettings::values.show_filter_bar =
- ReadSetting(QStringLiteral("showFilterBar"), true).toBool();
- UISettings::values.show_status_bar =
- ReadSetting(QStringLiteral("showStatusBar"), true).toBool();
- UISettings::values.confirm_before_closing =
- ReadSetting(QStringLiteral("confirmClose"), true).toBool();
- UISettings::values.first_start = ReadSetting(QStringLiteral("firstStart"), true).toBool();
- UISettings::values.callout_flags = ReadSetting(QStringLiteral("calloutFlags"), 0).toUInt();
- UISettings::values.show_console = ReadSetting(QStringLiteral("showConsole"), false).toBool();
- UISettings::values.pause_when_in_background =
- ReadSetting(QStringLiteral("pauseWhenInBackground"), false).toBool();
- UISettings::values.hide_mouse =
- ReadSetting(QStringLiteral("hideInactiveMouse"), false).toBool();
+ ReadBasicSetting(UISettings::values.single_window_mode);
+ ReadBasicSetting(UISettings::values.fullscreen);
+ ReadBasicSetting(UISettings::values.display_titlebar);
+ ReadBasicSetting(UISettings::values.show_filter_bar);
+ ReadBasicSetting(UISettings::values.show_status_bar);
+ ReadBasicSetting(UISettings::values.confirm_before_closing);
+ ReadBasicSetting(UISettings::values.first_start);
+ ReadBasicSetting(UISettings::values.callout_flags);
+ ReadBasicSetting(UISettings::values.show_console);
+ ReadBasicSetting(UISettings::values.pause_when_in_background);
+ ReadBasicSetting(UISettings::values.hide_mouse);
qt_config->endGroup();
}
@@ -945,12 +944,11 @@ void Config::ReadUIValues() {
void Config::ReadUIGamelistValues() {
qt_config->beginGroup(QStringLiteral("UIGameList"));
- UISettings::values.show_add_ons = ReadSetting(QStringLiteral("show_add_ons"), true).toBool();
- UISettings::values.icon_size = ReadSetting(QStringLiteral("icon_size"), 64).toUInt();
- UISettings::values.row_1_text_id = ReadSetting(QStringLiteral("row_1_text_id"), 3).toUInt();
- UISettings::values.row_2_text_id = ReadSetting(QStringLiteral("row_2_text_id"), 2).toUInt();
- UISettings::values.cache_game_list =
- ReadSetting(QStringLiteral("cache_game_list"), true).toBool();
+ ReadBasicSetting(UISettings::values.show_add_ons);
+ ReadBasicSetting(UISettings::values.icon_size);
+ ReadBasicSetting(UISettings::values.row_1_text_id);
+ ReadBasicSetting(UISettings::values.row_2_text_id);
+ ReadBasicSetting(UISettings::values.cache_game_list);
const int favorites_size = qt_config->beginReadArray(QStringLiteral("favorites"));
for (int i = 0; i < favorites_size; i++) {
qt_config->setArrayIndex(i);
@@ -973,8 +971,7 @@ void Config::ReadUILayoutValues() {
ReadSetting(QStringLiteral("gameListHeaderState")).toByteArray();
UISettings::values.microprofile_geometry =
ReadSetting(QStringLiteral("microProfileDialogGeometry")).toByteArray();
- UISettings::values.microprofile_visible =
- ReadSetting(QStringLiteral("microProfileDialogVisible"), false).toBool();
+ ReadBasicSetting(UISettings::values.microprofile_visible);
qt_config->endGroup();
}
@@ -982,16 +979,10 @@ void Config::ReadUILayoutValues() {
void Config::ReadWebServiceValues() {
qt_config->beginGroup(QStringLiteral("WebService"));
- Settings::values.enable_telemetry =
- ReadSetting(QStringLiteral("enable_telemetry"), true).toBool();
- Settings::values.web_api_url =
- ReadSetting(QStringLiteral("web_api_url"), QStringLiteral("https://api.yuzu-emu.org"))
- .toString()
- .toStdString();
- Settings::values.yuzu_username =
- ReadSetting(QStringLiteral("yuzu_username")).toString().toStdString();
- Settings::values.yuzu_token =
- ReadSetting(QStringLiteral("yuzu_token")).toString().toStdString();
+ ReadBasicSetting(Settings::values.enable_telemetry);
+ ReadBasicSetting(Settings::values.web_api_url);
+ ReadBasicSetting(Settings::values.yuzu_username);
+ ReadBasicSetting(Settings::values.yuzu_token);
qt_config->endGroup();
}
@@ -1077,7 +1068,7 @@ void Config::SavePlayerValue(std::size_t player_index) {
}
void Config::SaveDebugValues() {
- WriteSetting(QStringLiteral("debug_pad_enabled"), Settings::values.debug_pad_enabled, false);
+ WriteBasicSetting(Settings::values.debug_pad_enabled);
for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
const std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
WriteSetting(QStringLiteral("debug_pad_") +
@@ -1097,7 +1088,7 @@ void Config::SaveDebugValues() {
}
void Config::SaveMouseValues() {
- WriteSetting(QStringLiteral("mouse_enabled"), Settings::values.mouse_enabled, false);
+ WriteBasicSetting(Settings::values.mouse_enabled);
for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
const std::string default_param =
@@ -1120,19 +1111,11 @@ void Config::SaveTouchscreenValues() {
}
void Config::SaveMotionTouchValues() {
- WriteSetting(QStringLiteral("motion_device"),
- QString::fromStdString(Settings::values.motion_device),
- QStringLiteral("engine:motion_emu,update_period:100,sensitivity:0.01"));
- WriteSetting(QStringLiteral("touch_device"),
- QString::fromStdString(Settings::values.touch_device),
- QStringLiteral("engine:emu_window"));
- WriteSetting(QStringLiteral("use_touch_from_button"), Settings::values.use_touch_from_button,
- false);
- WriteSetting(QStringLiteral("touch_from_button_map"),
- Settings::values.touch_from_button_map_index, 0);
- WriteSetting(QStringLiteral("udp_input_servers"),
- QString::fromStdString(Settings::values.udp_input_servers),
- QString::fromUtf8(InputCommon::CemuhookUDP::DEFAULT_SRV));
+ WriteBasicSetting(Settings::values.motion_device);
+ WriteBasicSetting(Settings::values.touch_device);
+ WriteBasicSetting(Settings::values.use_touch_from_button);
+ WriteBasicSetting(Settings::values.touch_from_button_map_index);
+ WriteBasicSetting(Settings::values.udp_input_servers);
qt_config->beginWriteArray(QStringLiteral("touch_from_button_maps"));
for (std::size_t p = 0; p < Settings::values.touch_from_button_maps.size(); ++p) {
@@ -1175,15 +1158,11 @@ void Config::SaveAudioValues() {
qt_config->beginGroup(QStringLiteral("Audio"));
if (global) {
- WriteSetting(QStringLiteral("output_engine"),
- QString::fromStdString(Settings::values.sink_id), QStringLiteral("auto"));
- WriteSetting(QStringLiteral("output_device"),
- QString::fromStdString(Settings::values.audio_device_id),
- QStringLiteral("auto"));
+ WriteBasicSetting(Settings::values.sink_id);
+ WriteBasicSetting(Settings::values.audio_device_id);
}
- WriteSettingGlobal(QStringLiteral("enable_audio_stretching"),
- Settings::values.enable_audio_stretching, true);
- WriteSettingGlobal(QStringLiteral("volume"), Settings::values.volume, 1.0f);
+ WriteGlobalSetting(Settings::values.enable_audio_stretching);
+ WriteGlobalSetting(Settings::values.volume);
qt_config->endGroup();
}
@@ -1199,30 +1178,21 @@ void Config::SaveControlValues() {
SaveTouchscreenValues();
SaveMotionTouchValues();
- WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, true);
- WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled,
- true);
- WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"),
- Settings::values.enable_accurate_vibrations, false);
- WriteSettingGlobal(QStringLiteral("motion_enabled"), Settings::values.motion_enabled, true);
- WriteSetting(QStringLiteral("motion_device"),
- QString::fromStdString(Settings::values.motion_device),
- QStringLiteral("engine:motion_emu,update_period:100,sensitivity:0.01"));
- WriteSetting(QStringLiteral("touch_device"),
- QString::fromStdString(Settings::values.touch_device),
- QStringLiteral("engine:emu_window"));
- WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false);
- WriteSetting(QStringLiteral("emulate_analog_keyboard"),
- Settings::values.emulate_analog_keyboard, false);
- WriteSetting(QStringLiteral("mouse_panning_sensitivity"),
- Settings::values.mouse_panning_sensitivity, 1.0f);
+ WriteGlobalSetting(Settings::values.use_docked_mode);
+ WriteGlobalSetting(Settings::values.vibration_enabled);
+ WriteGlobalSetting(Settings::values.enable_accurate_vibrations);
+ WriteGlobalSetting(Settings::values.motion_enabled);
+ WriteBasicSetting(Settings::values.keyboard_enabled);
+ WriteBasicSetting(Settings::values.emulate_analog_keyboard);
+ WriteBasicSetting(Settings::values.mouse_panning_sensitivity);
+
qt_config->endGroup();
}
void Config::SaveCoreValues() {
qt_config->beginGroup(QStringLiteral("Core"));
- WriteSettingGlobal(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, true);
+ WriteGlobalSetting(Settings::values.use_multi_core);
qt_config->endGroup();
}
@@ -1230,7 +1200,7 @@ void Config::SaveCoreValues() {
void Config::SaveDataStorageValues() {
qt_config->beginGroup(QStringLiteral("Data Storage"));
- WriteSetting(QStringLiteral("use_virtual_sd"), Settings::values.use_virtual_sd, true);
+ WriteBasicSetting(Settings::values.use_virtual_sd);
WriteSetting(QStringLiteral("nand_directory"),
QString::fromStdString(FS::GetYuzuPathString(FS::YuzuPath::NANDDir)),
QString::fromStdString(FS::GetYuzuPathString(FS::YuzuPath::NANDDir)));
@@ -1243,11 +1213,9 @@ void Config::SaveDataStorageValues() {
WriteSetting(QStringLiteral("dump_directory"),
QString::fromStdString(FS::GetYuzuPathString(FS::YuzuPath::DumpDir)),
QString::fromStdString(FS::GetYuzuPathString(FS::YuzuPath::DumpDir)));
- WriteSetting(QStringLiteral("gamecard_inserted"), Settings::values.gamecard_inserted, false);
- WriteSetting(QStringLiteral("gamecard_current_game"), Settings::values.gamecard_current_game,
- false);
- WriteSetting(QStringLiteral("gamecard_path"),
- QString::fromStdString(Settings::values.gamecard_path), QString{});
+ WriteBasicSetting(Settings::values.gamecard_inserted);
+ WriteBasicSetting(Settings::values.gamecard_current_game);
+ WriteBasicSetting(Settings::values.gamecard_path);
qt_config->endGroup();
}
@@ -1257,24 +1225,23 @@ void Config::SaveDebuggingValues() {
// Intentionally not using the QT default setting as this is intended to be changed in the ini
qt_config->setValue(QStringLiteral("record_frame_times"), Settings::values.record_frame_times);
- WriteSetting(QStringLiteral("program_args"),
- QString::fromStdString(Settings::values.program_args), QString{});
- WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false);
- WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
- WriteSetting(QStringLiteral("enable_fs_access_log"), Settings::values.enable_fs_access_log,
- false);
- WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
- WriteSetting(QStringLiteral("use_debug_asserts"), Settings::values.use_debug_asserts, false);
- WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false);
+ WriteBasicSetting(Settings::values.program_args);
+ WriteBasicSetting(Settings::values.dump_exefs);
+ WriteBasicSetting(Settings::values.dump_nso);
+ WriteBasicSetting(Settings::values.enable_fs_access_log);
+ WriteBasicSetting(Settings::values.quest_flag);
+ WriteBasicSetting(Settings::values.use_debug_asserts);
+ WriteBasicSetting(Settings::values.disable_macro_jit);
qt_config->endGroup();
}
void Config::SaveServiceValues() {
qt_config->beginGroup(QStringLiteral("Services"));
- WriteSetting(QStringLiteral("bcat_backend"),
- QString::fromStdString(Settings::values.bcat_backend), QStringLiteral("none"));
- WriteSetting(QStringLiteral("bcat_boxcat_local"), Settings::values.bcat_boxcat_local, false);
+
+ WriteBasicSetting(Settings::values.bcat_backend);
+ WriteBasicSetting(Settings::values.bcat_boxcat_local);
+
qt_config->endGroup();
}
@@ -1300,9 +1267,8 @@ void Config::SaveDisabledAddOnValues() {
void Config::SaveMiscellaneousValues() {
qt_config->beginGroup(QStringLiteral("Miscellaneous"));
- WriteSetting(QStringLiteral("log_filter"), QString::fromStdString(Settings::values.log_filter),
- QStringLiteral("*:Info"));
- WriteSetting(QStringLiteral("use_dev_keys"), Settings::values.use_dev_keys, false);
+ WriteBasicSetting(Settings::values.log_filter);
+ WriteBasicSetting(Settings::values.use_dev_keys);
qt_config->endGroup();
}
@@ -1330,36 +1296,29 @@ void Config::SavePathValues() {
void Config::SaveCpuValues() {
qt_config->beginGroup(QStringLiteral("Cpu"));
- WriteSettingGlobal(QStringLiteral("cpu_accuracy"),
- static_cast<u32>(Settings::values.cpu_accuracy.GetValue(global)),
- Settings::values.cpu_accuracy.UsingGlobal(),
- static_cast<u32>(Settings::CPUAccuracy::Accurate));
+ WriteBasicSetting(Settings::values.cpu_accuracy_first_time);
+ WriteSetting(QStringLiteral("cpu_accuracy"),
+ static_cast<u32>(Settings::values.cpu_accuracy.GetValue(global)),
+ static_cast<u32>(Settings::values.cpu_accuracy.GetDefault()),
+ Settings::values.cpu_accuracy.UsingGlobal());
- WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_unfuse_fma"),
- Settings::values.cpuopt_unsafe_unfuse_fma, true);
- WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
- Settings::values.cpuopt_unsafe_reduce_fp_error, true);
- WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
- Settings::values.cpuopt_unsafe_inaccurate_nan, true);
- WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"),
- Settings::values.cpuopt_unsafe_fastmem_check, true);
+ WriteGlobalSetting(Settings::values.cpuopt_unsafe_unfuse_fma);
+ WriteGlobalSetting(Settings::values.cpuopt_unsafe_reduce_fp_error);
+ WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
+ WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
+ WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
if (global) {
- WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables,
- true);
- WriteSetting(QStringLiteral("cpuopt_block_linking"), Settings::values.cpuopt_block_linking,
- true);
- WriteSetting(QStringLiteral("cpuopt_return_stack_buffer"),
- Settings::values.cpuopt_return_stack_buffer, true);
- WriteSetting(QStringLiteral("cpuopt_fast_dispatcher"),
- Settings::values.cpuopt_fast_dispatcher, true);
- WriteSetting(QStringLiteral("cpuopt_context_elimination"),
- Settings::values.cpuopt_context_elimination, true);
- WriteSetting(QStringLiteral("cpuopt_const_prop"), Settings::values.cpuopt_const_prop, true);
- WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true);
- WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"),
- Settings::values.cpuopt_reduce_misalign_checks, true);
- WriteSetting(QStringLiteral("cpuopt_fastmem"), Settings::values.cpuopt_fastmem, true);
+ WriteBasicSetting(Settings::values.cpu_debug_mode);
+ WriteBasicSetting(Settings::values.cpuopt_page_tables);
+ WriteBasicSetting(Settings::values.cpuopt_block_linking);
+ WriteBasicSetting(Settings::values.cpuopt_return_stack_buffer);
+ WriteBasicSetting(Settings::values.cpuopt_fast_dispatcher);
+ WriteBasicSetting(Settings::values.cpuopt_context_elimination);
+ WriteBasicSetting(Settings::values.cpuopt_const_prop);
+ WriteBasicSetting(Settings::values.cpuopt_misc_ir);
+ WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
+ WriteBasicSetting(Settings::values.cpuopt_fastmem);
}
qt_config->endGroup();
@@ -1368,43 +1327,42 @@ void Config::SaveCpuValues() {
void Config::SaveRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
- WriteSettingGlobal(QStringLiteral("backend"),
- static_cast<int>(Settings::values.renderer_backend.GetValue(global)),
- Settings::values.renderer_backend.UsingGlobal(), 0);
- WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
- WriteSettingGlobal(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
-#ifdef _WIN32
- WriteSettingGlobal(QStringLiteral("fullscreen_mode"), Settings::values.fullscreen_mode, 0);
-#else
- // *nix platforms may have issues with the borderless windowed fullscreen mode.
- // Default to exclusive fullscreen on these platforms for now.
- WriteSettingGlobal(QStringLiteral("fullscreen_mode"), Settings::values.fullscreen_mode, 1);
-#endif
- WriteSettingGlobal(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
- WriteSettingGlobal(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
- WriteSettingGlobal(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
- WriteSettingGlobal(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
- WriteSettingGlobal(QStringLiteral("use_disk_shader_cache"),
- Settings::values.use_disk_shader_cache, true);
- WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
- static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
- Settings::values.gpu_accuracy.UsingGlobal(), 1);
- WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
- Settings::values.use_asynchronous_gpu_emulation, true);
- WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
- true);
- WriteSettingGlobal(QStringLiteral("accelerate_astc"), Settings::values.accelerate_astc, true);
- WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
- WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
- Settings::values.use_assembly_shaders, false);
- WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"),
- Settings::values.use_asynchronous_shaders, false);
- WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
- true);
- // Cast to double because Qt's written float values are not human-readable
- WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0);
- WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0);
- WriteSettingGlobal(QStringLiteral("bg_blue"), Settings::values.bg_blue, 0.0);
+ WriteSetting(QString::fromStdString(Settings::values.renderer_backend.GetLabel()),
+ static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
+ static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
+ Settings::values.renderer_backend.UsingGlobal());
+ WriteGlobalSetting(Settings::values.vulkan_device);
+ WriteGlobalSetting(Settings::values.fullscreen_mode);
+ WriteGlobalSetting(Settings::values.aspect_ratio);
+ WriteGlobalSetting(Settings::values.max_anisotropy);
+ WriteGlobalSetting(Settings::values.use_speed_limit);
+ WriteGlobalSetting(Settings::values.speed_limit);
+ WriteGlobalSetting(Settings::values.use_disk_shader_cache);
+ WriteSetting(QString::fromStdString(Settings::values.gpu_accuracy.GetLabel()),
+ static_cast<u32>(Settings::values.gpu_accuracy.GetValue(global)),
+ static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()),
+ Settings::values.gpu_accuracy.UsingGlobal());
+ WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
+ WriteGlobalSetting(Settings::values.use_nvdec_emulation);
+ WriteGlobalSetting(Settings::values.accelerate_astc);
+ WriteGlobalSetting(Settings::values.use_vsync);
+ WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
+ static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
+ static_cast<u32>(Settings::values.shader_backend.GetDefault()),
+ Settings::values.shader_backend.UsingGlobal());
+ WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
+ WriteGlobalSetting(Settings::values.use_fast_gpu_time);
+ WriteGlobalSetting(Settings::values.use_caches_gc);
+ WriteGlobalSetting(Settings::values.bg_red);
+ WriteGlobalSetting(Settings::values.bg_green);
+ WriteGlobalSetting(Settings::values.bg_blue);
+
+ if (global) {
+ WriteBasicSetting(Settings::values.fps_cap);
+ WriteBasicSetting(Settings::values.renderer_debug);
+ WriteBasicSetting(Settings::values.enable_nsight_aftermath);
+ WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks);
+ }
qt_config->endGroup();
}
@@ -1412,8 +1370,7 @@ void Config::SaveRendererValues() {
void Config::SaveScreenshotValues() {
qt_config->beginGroup(QStringLiteral("Screenshots"));
- WriteSetting(QStringLiteral("enable_screenshot_save_as"),
- UISettings::values.enable_screenshot_save_as);
+ WriteBasicSetting(UISettings::values.enable_screenshot_save_as);
WriteSetting(QStringLiteral("screenshot_path"),
QString::fromStdString(FS::GetYuzuPathString(FS::YuzuPath::ScreenshotsDir)));
@@ -1443,17 +1400,16 @@ void Config::SaveShortcutValues() {
void Config::SaveSystemValues() {
qt_config->beginGroup(QStringLiteral("System"));
- WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
- WriteSettingGlobal(QStringLiteral("language_index"), Settings::values.language_index, 1);
- WriteSettingGlobal(QStringLiteral("region_index"), Settings::values.region_index, 1);
- WriteSettingGlobal(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0);
+ WriteBasicSetting(Settings::values.current_user);
+ WriteGlobalSetting(Settings::values.language_index);
+ WriteGlobalSetting(Settings::values.region_index);
+ WriteGlobalSetting(Settings::values.time_zone_index);
- WriteSettingGlobal(QStringLiteral("rng_seed_enabled"),
- Settings::values.rng_seed.GetValue(global).has_value(),
- Settings::values.rng_seed.UsingGlobal(), false);
- WriteSettingGlobal(QStringLiteral("rng_seed"),
- Settings::values.rng_seed.GetValue(global).value_or(0),
- Settings::values.rng_seed.UsingGlobal(), 0);
+ WriteSetting(QStringLiteral("rng_seed_enabled"),
+ Settings::values.rng_seed.GetValue(global).has_value(), false,
+ Settings::values.rng_seed.UsingGlobal());
+ WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.GetValue(global).value_or(0),
+ 0, Settings::values.rng_seed.UsingGlobal());
if (global) {
WriteSetting(QStringLiteral("custom_rtc_enabled"), Settings::values.custom_rtc.has_value(),
@@ -1464,7 +1420,7 @@ void Config::SaveSystemValues() {
0);
}
- WriteSettingGlobal(QStringLiteral("sound_index"), Settings::values.sound_index, 1);
+ WriteGlobalSetting(Settings::values.sound_index);
qt_config->endGroup();
}
@@ -1474,10 +1430,8 @@ void Config::SaveUIValues() {
WriteSetting(QStringLiteral("theme"), UISettings::values.theme,
QString::fromUtf8(UISettings::themes[0].second));
- WriteSetting(QStringLiteral("enable_discord_presence"),
- UISettings::values.enable_discord_presence, true);
- WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
- false);
+ WriteBasicSetting(UISettings::values.enable_discord_presence);
+ WriteBasicSetting(UISettings::values.select_user_on_boot);
SaveUIGamelistValues();
SaveUILayoutValues();
@@ -1485,18 +1439,17 @@ void Config::SaveUIValues() {
SaveScreenshotValues();
SaveShortcutValues();
- WriteSetting(QStringLiteral("singleWindowMode"), UISettings::values.single_window_mode, true);
- WriteSetting(QStringLiteral("fullscreen"), UISettings::values.fullscreen, false);
- WriteSetting(QStringLiteral("displayTitleBars"), UISettings::values.display_titlebar, true);
- WriteSetting(QStringLiteral("showFilterBar"), UISettings::values.show_filter_bar, true);
- WriteSetting(QStringLiteral("showStatusBar"), UISettings::values.show_status_bar, true);
- WriteSetting(QStringLiteral("confirmClose"), UISettings::values.confirm_before_closing, true);
- WriteSetting(QStringLiteral("firstStart"), UISettings::values.first_start, true);
- WriteSetting(QStringLiteral("calloutFlags"), UISettings::values.callout_flags, 0);
- WriteSetting(QStringLiteral("showConsole"), UISettings::values.show_console, false);
- WriteSetting(QStringLiteral("pauseWhenInBackground"),
- UISettings::values.pause_when_in_background, false);
- WriteSetting(QStringLiteral("hideInactiveMouse"), UISettings::values.hide_mouse, false);
+ WriteBasicSetting(UISettings::values.single_window_mode);
+ WriteBasicSetting(UISettings::values.fullscreen);
+ WriteBasicSetting(UISettings::values.display_titlebar);
+ WriteBasicSetting(UISettings::values.show_filter_bar);
+ WriteBasicSetting(UISettings::values.show_status_bar);
+ WriteBasicSetting(UISettings::values.confirm_before_closing);
+ WriteBasicSetting(UISettings::values.first_start);
+ WriteBasicSetting(UISettings::values.callout_flags);
+ WriteBasicSetting(UISettings::values.show_console);
+ WriteBasicSetting(UISettings::values.pause_when_in_background);
+ WriteBasicSetting(UISettings::values.hide_mouse);
qt_config->endGroup();
}
@@ -1504,11 +1457,11 @@ void Config::SaveUIValues() {
void Config::SaveUIGamelistValues() {
qt_config->beginGroup(QStringLiteral("UIGameList"));
- WriteSetting(QStringLiteral("show_add_ons"), UISettings::values.show_add_ons, true);
- WriteSetting(QStringLiteral("icon_size"), UISettings::values.icon_size, 64);
- WriteSetting(QStringLiteral("row_1_text_id"), UISettings::values.row_1_text_id, 3);
- WriteSetting(QStringLiteral("row_2_text_id"), UISettings::values.row_2_text_id, 2);
- WriteSetting(QStringLiteral("cache_game_list"), UISettings::values.cache_game_list, true);
+ WriteBasicSetting(UISettings::values.show_add_ons);
+ WriteBasicSetting(UISettings::values.icon_size);
+ WriteBasicSetting(UISettings::values.row_1_text_id);
+ WriteBasicSetting(UISettings::values.row_2_text_id);
+ WriteBasicSetting(UISettings::values.cache_game_list);
qt_config->beginWriteArray(QStringLiteral("favorites"));
for (int i = 0; i < UISettings::values.favorited_ids.size(); i++) {
qt_config->setArrayIndex(i);
@@ -1529,8 +1482,7 @@ void Config::SaveUILayoutValues() {
WriteSetting(QStringLiteral("gameListHeaderState"), UISettings::values.gamelist_header_state);
WriteSetting(QStringLiteral("microProfileDialogGeometry"),
UISettings::values.microprofile_geometry);
- WriteSetting(QStringLiteral("microProfileDialogVisible"),
- UISettings::values.microprofile_visible, false);
+ WriteBasicSetting(UISettings::values.microprofile_visible);
qt_config->endGroup();
}
@@ -1538,13 +1490,10 @@ void Config::SaveUILayoutValues() {
void Config::SaveWebServiceValues() {
qt_config->beginGroup(QStringLiteral("WebService"));
- WriteSetting(QStringLiteral("enable_telemetry"), Settings::values.enable_telemetry, true);
- WriteSetting(QStringLiteral("web_api_url"),
- QString::fromStdString(Settings::values.web_api_url),
- QStringLiteral("https://api.yuzu-emu.org"));
- WriteSetting(QStringLiteral("yuzu_username"),
- QString::fromStdString(Settings::values.yuzu_username));
- WriteSetting(QStringLiteral("yuzu_token"), QString::fromStdString(Settings::values.yuzu_token));
+ WriteBasicSetting(Settings::values.enable_telemetry);
+ WriteBasicSetting(Settings::values.web_api_url);
+ WriteBasicSetting(Settings::values.yuzu_username);
+ WriteBasicSetting(Settings::values.yuzu_token);
qt_config->endGroup();
}
@@ -1564,21 +1513,14 @@ QVariant Config::ReadSetting(const QString& name, const QVariant& default_value)
}
template <typename Type>
-void Config::ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name) {
+void Config::ReadGlobalSetting(Settings::Setting<Type>& setting) {
+ QString name = QString::fromStdString(setting.GetLabel());
const bool use_global = qt_config->value(name + QStringLiteral("/use_global"), true).toBool();
setting.SetGlobal(use_global);
if (global || !use_global) {
- setting.SetValue(ReadSetting(name).value<Type>());
- }
-}
-
-template <typename Type>
-void Config::ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name,
- const QVariant& default_value) {
- const bool use_global = qt_config->value(name + QStringLiteral("/use_global"), true).toBool();
- setting.SetGlobal(use_global);
- if (global || !use_global) {
- setting.SetValue(ReadSetting(name, default_value).value<Type>());
+ setting.SetValue(static_cast<QVariant>(
+ ReadSetting(name, QVariant::fromValue<Type>(setting.GetDefault())))
+ .value<Type>());
}
}
@@ -1601,31 +1543,8 @@ void Config::WriteSetting(const QString& name, const QVariant& value,
qt_config->setValue(name, value);
}
-template <typename Type>
-void Config::WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting) {
- if (!global) {
- qt_config->setValue(name + QStringLiteral("/use_global"), setting.UsingGlobal());
- }
- if (global || !setting.UsingGlobal()) {
- qt_config->setValue(name, setting.GetValue(global));
- }
-}
-
-template <typename Type>
-void Config::WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting,
- const QVariant& default_value) {
- if (!global) {
- qt_config->setValue(name + QStringLiteral("/use_global"), setting.UsingGlobal());
- }
- if (global || !setting.UsingGlobal()) {
- qt_config->setValue(name + QStringLiteral("/default"),
- setting.GetValue(global) == default_value.value<Type>());
- qt_config->setValue(name, setting.GetValue(global));
- }
-}
-
-void Config::WriteSettingGlobal(const QString& name, const QVariant& value, bool use_global,
- const QVariant& default_value) {
+void Config::WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value,
+ bool use_global) {
if (!global) {
qt_config->setValue(name + QStringLiteral("/use_global"), use_global);
}
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index ce3355588..4bbb9f1cd 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,7 +42,7 @@ public:
default_mouse_buttons;
static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
- static const std::array<UISettings::Shortcut, 17> default_hotkeys;
+ static const std::array<UISettings::Shortcut, 18> default_hotkeys;
private:
void Initialize(const std::string& config_name);
@@ -102,28 +102,75 @@ private:
void SaveUILayoutValues();
void SaveWebServiceValues();
+ /**
+ * Reads a setting from the qt_config.
+ *
+ * @param name The setting's identifier
+ * @param default_value The value to use when the setting is not already present in the config
+ */
QVariant ReadSetting(const QString& name) const;
QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
- // Templated ReadSettingGlobal functions will also look for the use_global setting and set
- // both the value and the global state properly
- template <typename Type>
- void ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name);
- template <typename Type>
- void ReadSettingGlobal(Settings::Setting<Type>& setting, const QString& name,
- const QVariant& default_value);
+
+ /**
+ * Only reads a setting from the qt_config if the current config is a global config, or if the
+ * current config is a custom config and the setting is overriding the global setting. Otherwise
+ * it does nothing.
+ *
+ * @param setting The variable to be modified
+ * @param name The setting's identifier
+ * @param default_value The value to use when the setting is not already present in the config
+ */
template <typename Type>
void ReadSettingGlobal(Type& setting, const QString& name, const QVariant& default_value) const;
- // Templated WriteSettingGlobal functions will also write the global state if needed and will
- // skip writing the actual setting if it defers to the global value
+
+ /**
+ * Writes a setting to the qt_config.
+ *
+ * @param name The setting's idetentifier
+ * @param value Value of the setting
+ * @param default_value Default of the setting if not present in qt_config
+ * @param use_global Specifies if the custom or global config should be in use, for custom
+ * configs
+ */
void WriteSetting(const QString& name, const QVariant& value);
void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
+ void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value,
+ bool use_global);
+
+ /**
+ * Reads a value from the qt_config and applies it to the setting, using its label and default
+ * value. If the config is a custom config, this will also read the global state of the setting
+ * and apply that information to it.
+ *
+ * @param The setting
+ */
+ template <typename Type>
+ void ReadGlobalSetting(Settings::Setting<Type>& setting);
+
+ /**
+ * Sets a value to the qt_config using the setting's label and default value. If the config is a
+ * custom config, it will apply the global state, and the custom value if needed.
+ *
+ * @param The setting
+ */
template <typename Type>
- void WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting);
+ void WriteGlobalSetting(const Settings::Setting<Type>& setting);
+
+ /**
+ * Reads a value from the qt_config using the setting's label and default value and applies the
+ * value to the setting.
+ *
+ * @param The setting
+ */
template <typename Type>
- void WriteSettingGlobal(const QString& name, const Settings::Setting<Type>& setting,
- const QVariant& default_value);
- void WriteSettingGlobal(const QString& name, const QVariant& value, bool use_global,
- const QVariant& default_value);
+ void ReadBasicSetting(Settings::BasicSetting<Type>& setting);
+
+ /** Sets a value from the setting in the qt_config using the setting's label and default value.
+ *
+ * @param The setting
+ */
+ template <typename Type>
+ void WriteBasicSetting(const Settings::BasicSetting<Type>& setting);
ConfigType type;
std::unique_ptr<QSettings> qt_config;
@@ -133,5 +180,6 @@ private:
// These metatype declarations cannot be in common/settings.h because core is devoid of QT
Q_DECLARE_METATYPE(Settings::CPUAccuracy);
-Q_DECLARE_METATYPE(Settings::RendererBackend);
Q_DECLARE_METATYPE(Settings::GPUAccuracy);
+Q_DECLARE_METATYPE(Settings::RendererBackend);
+Q_DECLARE_METATYPE(Settings::ShaderBackend);
diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui
index f92c3aff3..fca9aed5f 100644
--- a/src/yuzu/configuration/configure.ui
+++ b/src/yuzu/configuration/configure.ui
@@ -41,7 +41,7 @@
<item>
<widget class="QTabWidget" name="tabWidget">
<property name="currentIndex">
- <number>0</number>
+ <number>11</number>
</property>
<widget class="ConfigureGeneral" name="generalTab">
<property name="accessibleName">
@@ -107,14 +107,6 @@
<string>CPU</string>
</attribute>
</widget>
- <widget class="ConfigureCpuDebug" name="cpuDebugTab">
- <property name="accessibleName">
- <string>Debug</string>
- </property>
- <attribute name="title">
- <string>Debug</string>
- </attribute>
- </widget>
<widget class="ConfigureGraphics" name="graphicsTab">
<property name="accessibleName">
<string>Graphics</string>
@@ -139,7 +131,7 @@
<string>Audio</string>
</attribute>
</widget>
- <widget class="ConfigureDebug" name="debugTab">
+ <widget class="ConfigureDebugTab" name="debugTab">
<property name="accessibleName">
<string>Debug</string>
</property>
@@ -208,24 +200,12 @@
<container>1</container>
</customwidget>
<customwidget>
- <class>ConfigureDebug</class>
- <extends>QWidget</extends>
- <header>configuration/configure_debug.h</header>
- <container>1</container>
- </customwidget>
- <customwidget>
<class>ConfigureCpu</class>
<extends>QWidget</extends>
<header>configuration/configure_cpu.h</header>
<container>1</container>
</customwidget>
<customwidget>
- <class>ConfigureCpuDebug</class>
- <extends>QWidget</extends>
- <header>configuration/configure_cpu_debug.h</header>
- <container>1</container>
- </customwidget>
- <customwidget>
<class>ConfigureGraphics</class>
<extends>QWidget</extends>
<header>configuration/configure_graphics.h</header>
@@ -267,6 +247,12 @@
<header>configuration/configure_service.h</header>
<container>1</container>
</customwidget>
+ <customwidget>
+ <class>ConfigureDebugTab</class>
+ <extends>QWidget</extends>
+ <header>configuration/configure_debug_tab.h</header>
+ <container>1</container>
+ </customwidget>
</customwidgets>
<resources/>
<connections>
@@ -275,12 +261,32 @@
<signal>accepted()</signal>
<receiver>ConfigureDialog</receiver>
<slot>accept()</slot>
+ <hints>
+ <hint type="sourcelabel">
+ <x>20</x>
+ <y>20</y>
+ </hint>
+ <hint type="destinationlabel">
+ <x>20</x>
+ <y>20</y>
+ </hint>
+ </hints>
</connection>
<connection>
<sender>buttonBox</sender>
<signal>rejected()</signal>
<receiver>ConfigureDialog</receiver>
<slot>reject()</slot>
+ <hints>
+ <hint type="sourcelabel">
+ <x>20</x>
+ <y>20</y>
+ </hint>
+ <hint type="destinationlabel">
+ <x>20</x>
+ <y>20</y>
+ </hint>
+ </hints>
</connection>
</connections>
</ui>
diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp
index fc0191432..1d84bf4ed 100644
--- a/src/yuzu/configuration/configure_audio.cpp
+++ b/src/yuzu/configuration/configure_audio.cpp
@@ -47,7 +47,8 @@ void ConfigureAudio::SetConfiguration() {
SetAudioDeviceFromDeviceID();
- ui->volume_slider->setValue(Settings::values.volume.GetValue() * ui->volume_slider->maximum());
+ const auto volume_value = static_cast<int>(Settings::values.volume.GetValue());
+ ui->volume_slider->setValue(volume_value);
ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching.GetValue());
@@ -69,7 +70,7 @@ void ConfigureAudio::SetOutputSinkFromSinkID() {
[[maybe_unused]] const QSignalBlocker blocker(ui->output_sink_combo_box);
int new_sink_index = 0;
- const QString sink_id = QString::fromStdString(Settings::values.sink_id);
+ const QString sink_id = QString::fromStdString(Settings::values.sink_id.GetValue());
for (int index = 0; index < ui->output_sink_combo_box->count(); index++) {
if (ui->output_sink_combo_box->itemText(index) == sink_id) {
new_sink_index = index;
@@ -83,7 +84,7 @@ void ConfigureAudio::SetOutputSinkFromSinkID() {
void ConfigureAudio::SetAudioDeviceFromDeviceID() {
int new_device_index = -1;
- const QString device_id = QString::fromStdString(Settings::values.audio_device_id);
+ const QString device_id = QString::fromStdString(Settings::values.audio_device_id.GetValue());
for (int index = 0; index < ui->audio_device_combo_box->count(); index++) {
if (ui->audio_device_combo_box->itemText(index) == device_id) {
new_device_index = index;
@@ -106,24 +107,22 @@ void ConfigureAudio::ApplyConfiguration() {
Settings::values.sink_id =
ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex())
.toStdString();
- Settings::values.audio_device_id =
+ Settings::values.audio_device_id.SetValue(
ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex())
- .toStdString();
+ .toStdString());
// Guard if during game and set to game-specific value
if (Settings::values.volume.UsingGlobal()) {
- Settings::values.volume.SetValue(
- static_cast<float>(ui->volume_slider->sliderPosition()) /
- ui->volume_slider->maximum());
+ const auto volume = static_cast<u8>(ui->volume_slider->value());
+ Settings::values.volume.SetValue(volume);
}
} else {
if (ui->volume_combo_box->currentIndex() == 0) {
Settings::values.volume.SetGlobal(true);
} else {
Settings::values.volume.SetGlobal(false);
- Settings::values.volume.SetValue(
- static_cast<float>(ui->volume_slider->sliderPosition()) /
- ui->volume_slider->maximum());
+ const auto volume = static_cast<u8>(ui->volume_slider->value());
+ Settings::values.volume.SetValue(volume);
}
}
}
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index 22219cbad..8d7171487 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -20,8 +20,6 @@ ConfigureCpu::ConfigureCpu(QWidget* parent) : QWidget(parent), ui(new Ui::Config
SetConfiguration();
- connect(ui->accuracy, qOverload<int>(&QComboBox::activated), this,
- &ConfigureCpu::AccuracyUpdated);
connect(ui->accuracy, qOverload<int>(&QComboBox::currentIndexChanged), this,
&ConfigureCpu::UpdateGroup);
}
@@ -34,12 +32,15 @@ void ConfigureCpu::SetConfiguration() {
ui->accuracy->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock);
ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
+ ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
ui->cpuopt_unsafe_reduce_fp_error->setChecked(
Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue());
+ ui->cpuopt_unsafe_ignore_standard_fpcr->setChecked(
+ Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue());
ui->cpuopt_unsafe_inaccurate_nan->setChecked(
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
ui->cpuopt_unsafe_fastmem_check->setChecked(
@@ -55,20 +56,6 @@ void ConfigureCpu::SetConfiguration() {
UpdateGroup(ui->accuracy->currentIndex());
}
-void ConfigureCpu::AccuracyUpdated(int index) {
- if (Settings::IsConfiguringGlobal() &&
- static_cast<Settings::CPUAccuracy>(index) == Settings::CPUAccuracy::DebugMode) {
- const auto result = QMessageBox::warning(this, tr("Setting CPU to Debug Mode"),
- tr("CPU Debug Mode is only intended for developer "
- "use. Are you sure you want to enable this?"),
- QMessageBox::Yes | QMessageBox::No);
- if (result == QMessageBox::No) {
- ui->accuracy->setCurrentIndex(static_cast<int>(Settings::CPUAccuracy::Accurate));
- UpdateGroup(static_cast<int>(Settings::CPUAccuracy::Accurate));
- }
- }
-}
-
void ConfigureCpu::UpdateGroup(int index) {
if (!Settings::IsConfiguringGlobal()) {
index -= ConfigurationShared::USE_GLOBAL_OFFSET;
@@ -84,6 +71,9 @@ void ConfigureCpu::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_reduce_fp_error,
ui->cpuopt_unsafe_reduce_fp_error,
cpuopt_unsafe_reduce_fp_error);
+ ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_standard_fpcr,
+ ui->cpuopt_unsafe_ignore_standard_fpcr,
+ cpuopt_unsafe_ignore_standard_fpcr);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan,
ui->cpuopt_unsafe_inaccurate_nan,
cpuopt_unsafe_inaccurate_nan);
@@ -128,8 +118,6 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredComboBox(
ui->accuracy, ui->widget_accuracy,
static_cast<u32>(Settings::values.cpu_accuracy.GetValue(true)));
- ui->accuracy->removeItem(static_cast<u32>(Settings::CPUAccuracy::DebugMode) +
- ConfigurationShared::USE_GLOBAL_OFFSET);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_unfuse_fma,
Settings::values.cpuopt_unsafe_unfuse_fma,
@@ -137,6 +125,9 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_reduce_fp_error,
Settings::values.cpuopt_unsafe_reduce_fp_error,
cpuopt_unsafe_reduce_fp_error);
+ ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_standard_fpcr,
+ Settings::values.cpuopt_unsafe_ignore_standard_fpcr,
+ cpuopt_unsafe_ignore_standard_fpcr);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan,
Settings::values.cpuopt_unsafe_inaccurate_nan,
cpuopt_unsafe_inaccurate_nan);
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index 57ff2772a..154931482 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -29,7 +29,6 @@ private:
void changeEvent(QEvent* event) override;
void RetranslateUI();
- void AccuracyUpdated(int index);
void UpdateGroup(int index);
void SetConfiguration();
@@ -40,6 +39,7 @@ private:
ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma;
ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error;
+ ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
};
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index 31ef9e3f5..5b9457faf 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
- <width>400</width>
- <height>321</height>
+ <width>448</width>
+ <height>433</height>
</rect>
</property>
<property name="windowTitle">
@@ -17,7 +17,7 @@
<item>
<layout class="QVBoxLayout">
<item>
- <widget class="QGroupBox">
+ <widget class="QGroupBox" name="groupBox">
<property name="title">
<string>General</string>
</property>
@@ -36,17 +36,17 @@
<widget class="QComboBox" name="accuracy">
<item>
<property name="text">
- <string>Accurate</string>
+ <string>Auto</string>
</property>
</item>
<item>
<property name="text">
- <string>Unsafe</string>
+ <string>Accurate</string>
</property>
</item>
<item>
<property name="text">
- <string>Enable Debug Mode</string>
+ <string>Unsafe</string>
</property>
</item>
</widget>
@@ -57,7 +57,7 @@
<item>
<widget class="QLabel" name="label_recommended_accuracy">
<property name="text">
- <string>We recommend setting accuracy to &quot;Accurate&quot;.</string>
+ <string>We recommend setting accuracy to &quot;Auto&quot;.</string>
</property>
<property name="wordWrap">
<bool>false</bool>
@@ -112,6 +112,18 @@
</widget>
</item>
<item>
+ <widget class="QCheckBox" name="cpuopt_unsafe_ignore_standard_fpcr">
+ <property name="toolTip">
+ <string>
+ &lt;div&gt;This option improves the speed of 32 bits ASIMD floating-point functions by running with incorrect rounding modes.&lt;/div&gt;
+ </string>
+ </property>
+ <property name="text">
+ <string>Faster ASIMD instructions (32 bits only)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
<widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan">
<property name="toolTip">
<string>
diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp
index e25c52baf..98e2d2be5 100644
--- a/src/yuzu/configuration/configure_cpu_debug.cpp
+++ b/src/yuzu/configuration/configure_cpu_debug.cpp
@@ -24,23 +24,26 @@ void ConfigureCpuDebug::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->cpuopt_page_tables->setEnabled(runtime_lock);
- ui->cpuopt_page_tables->setChecked(Settings::values.cpuopt_page_tables);
+ ui->cpuopt_page_tables->setChecked(Settings::values.cpuopt_page_tables.GetValue());
ui->cpuopt_block_linking->setEnabled(runtime_lock);
- ui->cpuopt_block_linking->setChecked(Settings::values.cpuopt_block_linking);
+ ui->cpuopt_block_linking->setChecked(Settings::values.cpuopt_block_linking.GetValue());
ui->cpuopt_return_stack_buffer->setEnabled(runtime_lock);
- ui->cpuopt_return_stack_buffer->setChecked(Settings::values.cpuopt_return_stack_buffer);
+ ui->cpuopt_return_stack_buffer->setChecked(
+ Settings::values.cpuopt_return_stack_buffer.GetValue());
ui->cpuopt_fast_dispatcher->setEnabled(runtime_lock);
- ui->cpuopt_fast_dispatcher->setChecked(Settings::values.cpuopt_fast_dispatcher);
+ ui->cpuopt_fast_dispatcher->setChecked(Settings::values.cpuopt_fast_dispatcher.GetValue());
ui->cpuopt_context_elimination->setEnabled(runtime_lock);
- ui->cpuopt_context_elimination->setChecked(Settings::values.cpuopt_context_elimination);
+ ui->cpuopt_context_elimination->setChecked(
+ Settings::values.cpuopt_context_elimination.GetValue());
ui->cpuopt_const_prop->setEnabled(runtime_lock);
- ui->cpuopt_const_prop->setChecked(Settings::values.cpuopt_const_prop);
+ ui->cpuopt_const_prop->setChecked(Settings::values.cpuopt_const_prop.GetValue());
ui->cpuopt_misc_ir->setEnabled(runtime_lock);
- ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir);
+ ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir.GetValue());
ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock);
- ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks);
+ ui->cpuopt_reduce_misalign_checks->setChecked(
+ Settings::values.cpuopt_reduce_misalign_checks.GetValue());
ui->cpuopt_fastmem->setEnabled(runtime_lock);
- ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem);
+ ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
}
void ConfigureCpuDebug::ApplyConfiguration() {
diff --git a/src/yuzu/configuration/configure_cpu_debug.h b/src/yuzu/configuration/configure_cpu_debug.h
index 10de55099..1b0d8050c 100644
--- a/src/yuzu/configuration/configure_cpu_debug.h
+++ b/src/yuzu/configuration/configure_cpu_debug.h
@@ -6,7 +6,6 @@
#include <memory>
#include <QWidget>
-#include "common/settings.h"
namespace Ui {
class ConfigureCpuDebug;
diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui
index c43f89a5a..abf469b55 100644
--- a/src/yuzu/configuration/configure_cpu_debug.ui
+++ b/src/yuzu/configuration/configure_cpu_debug.ui
@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
- <width>400</width>
- <height>321</height>
+ <width>592</width>
+ <height>503</height>
</rect>
</property>
<property name="windowTitle">
@@ -17,140 +17,132 @@
<item>
<layout class="QVBoxLayout">
<item>
- <widget class="QGroupBox">
+ <widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Toggle CPU Optimizations</string>
</property>
<layout class="QVBoxLayout">
<item>
- <widget class="QLabel">
- <property name="wordWrap">
- <bool>1</bool>
- </property>
+ <widget class="QLabel" name="label">
<property name="text">
- <string>
- &lt;div&gt;
- &lt;b&gt;For debugging only.&lt;/b&gt;
- &lt;br&gt;
- If you're not sure what these do, keep all of these enabled.
- &lt;br&gt;
- These settings, when disabled, only take effect when CPU Accuracy is "Debug Mode".
- &lt;/div&gt;
- </string>
+ <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;&lt;span style=&quot; font-weight:600;&quot;&gt;For debugging only.&lt;/span&gt;&lt;br/&gt;If you're not sure what these do, keep all of these enabled. &lt;br/&gt;These settings, when disabled, only take effect when CPU Debugging is enabled. &lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+ </property>
+ <property name="wordWrap">
+ <bool>false</bool>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_page_tables">
- <property name="text">
- <string>Enable inline page tables</string>
- </property>
<property name="toolTip">
<string>
- &lt;div style="white-space: nowrap"&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
- &lt;div style="white-space: nowrap"&gt;Enabling it inlines accesses to PageTable::pointers into emitted code.&lt;/div&gt;
- &lt;div style="white-space: nowrap"&gt;Disabling this forces all memory accesses to go through the Memory::Read/Memory::Write functions.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it inlines accesses to PageTable::pointers into emitted code.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all memory accesses to go through the Memory::Read/Memory::Write functions.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable inline page tables</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_block_linking">
- <property name="text">
- <string>Enable block linking</string>
- </property>
<property name="toolTip">
<string>
&lt;div&gt;This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump directly to other basic blocks if the destination PC is static.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable block linking</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_return_stack_buffer">
- <property name="text">
- <string>Enable return stack buffer</string>
- </property>
<property name="toolTip">
<string>
&lt;div&gt;This optimization avoids dispatcher lookups by keeping track potential return addresses of BL instructions. This approximates what happens with a return stack buffer on a real CPU.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable return stack buffer</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_fast_dispatcher">
- <property name="text">
- <string>Enable fast dispatcher</string>
- </property>
<property name="toolTip">
<string>
&lt;div&gt;Enable a two-tiered dispatch system. A faster dispatcher written in assembly has a small MRU cache of jump destinations is used first. If that fails, dispatch falls back to the slower C++ dispatcher.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable fast dispatcher</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_context_elimination">
- <property name="text">
- <string>Enable context elimination</string>
- </property>
<property name="toolTip">
<string>
&lt;div&gt;Enables an IR optimization that reduces unnecessary accesses to the CPU context structure.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable context elimination</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_const_prop">
- <property name="text">
- <string>Enable constant propagation</string>
- </property>
<property name="toolTip">
<string>
&lt;div&gt;Enables IR optimizations that involve constant propagation.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable constant propagation</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_misc_ir">
- <property name="text">
- <string>Enable miscellaneous optimizations</string>
- </property>
<property name="toolTip">
<string>
&lt;div&gt;Enables miscellaneous IR optimizations.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable miscellaneous optimizations</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_reduce_misalign_checks">
- <property name="text">
- <string>Enable misalignment check reduction</string>
- </property>
<property name="toolTip">
<string>
- &lt;div style="white-space: nowrap"&gt;When enabled, a misalignment is only triggered when an access crosses a page boundary.&lt;/div&gt;
- &lt;div style="white-space: nowrap"&gt;When disabled, a misalignment is triggered on all misaligned accesses.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;When enabled, a misalignment is only triggered when an access crosses a page boundary.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;When disabled, a misalignment is triggered on all misaligned accesses.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable misalignment check reduction</string>
+ </property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_fastmem">
- <property name="text">
- <string>Enable Host MMU Emulation</string>
- </property>
<property name="toolTip">
<string>
- &lt;div style="white-space: nowrap"&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
- &lt;div style="white-space: nowrap"&gt;Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
- &lt;div style="white-space: nowrap"&gt;Disabling this forces all memory accesses to use Software MMU Emulation.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
+ &lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all memory accesses to use Software MMU Emulation.&lt;/div&gt;
</string>
</property>
+ <property name="text">
+ <string>Enable Host MMU Emulation</string>
+ </property>
</widget>
</item>
</layout>
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 15d6a5ad7..f7e29dbd7 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -31,20 +31,28 @@ void ConfigureDebug::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->toggle_console->setEnabled(runtime_lock);
- ui->toggle_console->setChecked(UISettings::values.show_console);
- ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter));
- ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args));
+ ui->toggle_console->setChecked(UISettings::values.show_console.GetValue());
+ ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter.GetValue()));
+ ui->homebrew_args_edit->setText(
+ QString::fromStdString(Settings::values.program_args.GetValue()));
ui->fs_access_log->setEnabled(runtime_lock);
- ui->fs_access_log->setChecked(Settings::values.enable_fs_access_log);
- ui->reporting_services->setChecked(Settings::values.reporting_services);
- ui->quest_flag->setChecked(Settings::values.quest_flag);
- ui->use_debug_asserts->setChecked(Settings::values.use_debug_asserts);
- ui->use_auto_stub->setChecked(Settings::values.use_auto_stub);
+ ui->fs_access_log->setChecked(Settings::values.enable_fs_access_log.GetValue());
+ ui->reporting_services->setChecked(Settings::values.reporting_services.GetValue());
+ ui->quest_flag->setChecked(Settings::values.quest_flag.GetValue());
+ ui->use_debug_asserts->setChecked(Settings::values.use_debug_asserts.GetValue());
+ ui->use_auto_stub->setChecked(Settings::values.use_auto_stub.GetValue());
ui->enable_graphics_debugging->setEnabled(runtime_lock);
- ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug);
+ ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue());
+ ui->enable_cpu_debugging->setEnabled(runtime_lock);
+ ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue());
+ ui->enable_nsight_aftermath->setEnabled(runtime_lock);
+ ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue());
ui->disable_macro_jit->setEnabled(runtime_lock);
- ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit);
- ui->extended_logging->setChecked(Settings::values.extended_logging);
+ ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue());
+ ui->disable_loop_safety_checks->setEnabled(runtime_lock);
+ ui->disable_loop_safety_checks->setChecked(
+ Settings::values.disable_shader_loop_safety_checks.GetValue());
+ ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue());
}
void ConfigureDebug::ApplyConfiguration() {
@@ -57,11 +65,15 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.use_debug_asserts = ui->use_debug_asserts->isChecked();
Settings::values.use_auto_stub = ui->use_auto_stub->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
+ Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked();
+ Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked();
+ Settings::values.disable_shader_loop_safety_checks =
+ ui->disable_loop_safety_checks->isChecked();
Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
Settings::values.extended_logging = ui->extended_logging->isChecked();
Debugger::ToggleConsole();
Common::Log::Filter filter;
- filter.ParseFilterString(Settings::values.log_filter);
+ filter.ParseFilterString(Settings::values.log_filter.GetValue());
Common::Log::SetGlobalFilter(filter);
}
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index c8087542f..c8baf2921 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>400</width>
- <height>486</height>
+ <height>777</height>
</rect>
</property>
<property name="windowTitle">
@@ -126,6 +126,16 @@
</widget>
</item>
<item>
+ <widget class="QCheckBox" name="enable_nsight_aftermath">
+ <property name="toolTip">
+ <string>When checked, it enables Nsight Aftermath crash dumps</string>
+ </property>
+ <property name="text">
+ <string>Enable Nsight Aftermath</string>
+ </property>
+ </widget>
+ </item>
+ <item>
<widget class="QCheckBox" name="disable_macro_jit">
<property name="enabled">
<bool>true</bool>
@@ -138,6 +148,16 @@
</property>
</widget>
</item>
+ <item>
+ <widget class="QCheckBox" name="disable_loop_safety_checks">
+ <property name="toolTip">
+ <string>When checked, it executes shaders without loop logic changes</string>
+ </property>
+ <property name="text">
+ <string>Disable Loop safety checks</string>
+ </property>
+ </widget>
+ </item>
</layout>
</widget>
</item>
@@ -192,34 +212,41 @@
</property>
</widget>
</item>
- <item>
- <widget class="QCheckBox" name="use_debug_asserts">
- <property name="text">
- <string>Enable Debug Asserts</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QCheckBox" name="use_auto_stub">
- <property name="text">
- <string>Enable Auto-Stub</string>
- </property>
- </widget>
- </item>
<item>
- <widget class="QLabel" name="label_5">
- <property name="font">
- <font>
- <italic>true</italic>
- </font>
- </property>
- <property name="text">
- <string>This will be reset automatically when yuzu closes.</string>
- </property>
- <property name="indent">
- <number>20</number>
- </property>
- </widget>
+ <widget class="QCheckBox" name="enable_cpu_debugging">
+ <property name="text">
+ <string>Enable CPU Debugging</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QCheckBox" name="use_debug_asserts">
+ <property name="text">
+ <string>Enable Debug Asserts</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QCheckBox" name="use_auto_stub">
+ <property name="text">
+ <string>Enable Auto-Stub</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QLabel" name="label_5">
+ <property name="font">
+ <font>
+ <italic>true</italic>
+ </font>
+ </property>
+ <property name="text">
+ <string>This will be reset automatically when yuzu closes.</string>
+ </property>
+ <property name="indent">
+ <number>20</number>
+ </property>
+ </widget>
</item>
</layout>
</widget>
@@ -245,11 +272,17 @@
<tabstops>
<tabstop>log_filter_edit</tabstop>
<tabstop>toggle_console</tabstop>
+ <tabstop>extended_logging</tabstop>
<tabstop>open_log_button</tabstop>
<tabstop>homebrew_args_edit</tabstop>
<tabstop>enable_graphics_debugging</tabstop>
+ <tabstop>enable_nsight_aftermath</tabstop>
+ <tabstop>disable_macro_jit</tabstop>
+ <tabstop>disable_loop_safety_checks</tabstop>
<tabstop>reporting_services</tabstop>
<tabstop>quest_flag</tabstop>
+ <tabstop>use_debug_asserts</tabstop>
+ <tabstop>use_auto_stub</tabstop>
</tabstops>
<resources/>
<connections/>
diff --git a/src/yuzu/configuration/configure_debug_tab.cpp b/src/yuzu/configuration/configure_debug_tab.cpp
new file mode 100644
index 000000000..67d369249
--- /dev/null
+++ b/src/yuzu/configuration/configure_debug_tab.cpp
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "ui_configure_debug_tab.h"
+#include "yuzu/configuration/configure_debug_tab.h"
+
+ConfigureDebugTab::ConfigureDebugTab(QWidget* parent)
+ : QWidget(parent), ui(new Ui::ConfigureDebugTab) {
+ ui->setupUi(this);
+
+ SetConfiguration();
+}
+
+ConfigureDebugTab::~ConfigureDebugTab() = default;
+
+void ConfigureDebugTab::ApplyConfiguration() {
+ ui->debugTab->ApplyConfiguration();
+ ui->cpuDebugTab->ApplyConfiguration();
+}
+
+void ConfigureDebugTab::SetCurrentIndex(int index) {
+ ui->tabWidget->setCurrentIndex(index);
+}
+
+void ConfigureDebugTab::changeEvent(QEvent* event) {
+ if (event->type() == QEvent::LanguageChange) {
+ RetranslateUI();
+ }
+
+ QWidget::changeEvent(event);
+}
+
+void ConfigureDebugTab::RetranslateUI() {
+ ui->retranslateUi(this);
+}
+
+void ConfigureDebugTab::SetConfiguration() {}
diff --git a/src/yuzu/configuration/configure_debug_tab.h b/src/yuzu/configuration/configure_debug_tab.h
new file mode 100644
index 000000000..0a96d43d0
--- /dev/null
+++ b/src/yuzu/configuration/configure_debug_tab.h
@@ -0,0 +1,32 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <QWidget>
+
+namespace Ui {
+class ConfigureDebugTab;
+}
+
+class ConfigureDebugTab : public QWidget {
+ Q_OBJECT
+
+public:
+ explicit ConfigureDebugTab(QWidget* parent = nullptr);
+ ~ConfigureDebugTab() override;
+
+ void ApplyConfiguration();
+
+ void SetCurrentIndex(int index);
+
+private:
+ void changeEvent(QEvent* event) override;
+ void RetranslateUI();
+
+ void SetConfiguration();
+
+ std::unique_ptr<Ui::ConfigureDebugTab> ui;
+};
diff --git a/src/yuzu/configuration/configure_debug_tab.ui b/src/yuzu/configuration/configure_debug_tab.ui
new file mode 100644
index 000000000..7dc6dd704
--- /dev/null
+++ b/src/yuzu/configuration/configure_debug_tab.ui
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>ConfigureDebugTab</class>
+ <widget class="QWidget" name="ConfigureDebugTab">
+ <property name="geometry">
+ <rect>
+ <x>0</x>
+ <y>0</y>
+ <width>320</width>
+ <height>240</height>
+ </rect>
+ </property>
+ <property name="windowTitle">
+ <string>Form</string>
+ </property>
+ <layout class="QVBoxLayout" name="verticalLayout">
+ <item>
+ <widget class="QTabWidget" name="tabWidget">
+ <property name="currentIndex">
+ <number>1</number>
+ </property>
+ <widget class="ConfigureDebug" name="debugTab">
+ <attribute name="title">
+ <string>General</string>
+ </attribute>
+ </widget>
+ <widget class="ConfigureCpuDebug" name="cpuDebugTab">
+ <attribute name="title">
+ <string>CPU</string>
+ </attribute>
+ </widget>
+ </widget>
+ </item>
+ </layout>
+ </widget>
+ <customwidgets>
+ <customwidget>
+ <class>ConfigureDebug</class>
+ <extends>QWidget</extends>
+ <header>configuration/configure_debug.h</header>
+ <container>1</container>
+ </customwidget>
+ <customwidget>
+ <class>ConfigureCpuDebug</class>
+ <extends>QWidget</extends>
+ <header>configuration/configure_cpu_debug.h</header>
+ <container>1</container>
+ </customwidget>
+ </customwidgets>
+ <resources/>
+ <connections/>
+</ui>
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index 371bc01b1..bc009b6b3 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -8,6 +8,7 @@
#include <QListWidgetItem>
#include <QPushButton>
#include <QSignalBlocker>
+#include <QTabWidget>
#include "common/settings.h"
#include "core/core.h"
#include "ui_configure.h"
@@ -32,6 +33,8 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry,
SetConfiguration();
PopulateSelectionList();
+ connect(ui->tabWidget, &QTabWidget::currentChanged, this,
+ [this]() { ui->debugTab->SetCurrentIndex(0); });
connect(ui->uiTab, &ConfigureUi::LanguageChanged, this, &ConfigureDialog::OnLanguageChanged);
connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
&ConfigureDialog::UpdateVisibleTabs);
@@ -59,7 +62,6 @@ void ConfigureDialog::ApplyConfiguration() {
ui->inputTab->ApplyConfiguration();
ui->hotkeysTab->ApplyConfiguration(registry);
ui->cpuTab->ApplyConfiguration();
- ui->cpuDebugTab->ApplyConfiguration();
ui->graphicsTab->ApplyConfiguration();
ui->graphicsAdvancedTab->ApplyConfiguration();
ui->audioTab->ApplyConfiguration();
@@ -102,7 +104,7 @@ void ConfigureDialog::PopulateSelectionList() {
const std::array<std::pair<QString, QList<QWidget*>>, 6> items{
{{tr("General"), {ui->generalTab, ui->hotkeysTab, ui->uiTab, ui->webTab, ui->debugTab}},
{tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}},
- {tr("CPU"), {ui->cpuTab, ui->cpuDebugTab}},
+ {tr("CPU"), {ui->cpuTab}},
{tr("Graphics"), {ui->graphicsTab, ui->graphicsAdvancedTab}},
{tr("Audio"), {ui->audioTab}},
{tr("Controls"), ui->inputTab->GetSubTabs()}},
diff --git a/src/yuzu/configuration/configure_filesystem.cpp b/src/yuzu/configuration/configure_filesystem.cpp
index d223c40ea..9cb317822 100644
--- a/src/yuzu/configuration/configure_filesystem.cpp
+++ b/src/yuzu/configuration/configure_filesystem.cpp
@@ -43,18 +43,19 @@ void ConfigureFilesystem::setConfiguration() {
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::NANDDir)));
ui->sdmc_directory_edit->setText(
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::SDMCDir)));
- ui->gamecard_path_edit->setText(QString::fromStdString(Settings::values.gamecard_path));
+ ui->gamecard_path_edit->setText(
+ QString::fromStdString(Settings::values.gamecard_path.GetValue()));
ui->dump_path_edit->setText(
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::DumpDir)));
ui->load_path_edit->setText(
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::LoadDir)));
- ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted);
- ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game);
- ui->dump_exefs->setChecked(Settings::values.dump_exefs);
- ui->dump_nso->setChecked(Settings::values.dump_nso);
+ ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted.GetValue());
+ ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game.GetValue());
+ ui->dump_exefs->setChecked(Settings::values.dump_exefs.GetValue());
+ ui->dump_nso->setChecked(Settings::values.dump_nso.GetValue());
- ui->cache_game_list->setChecked(UISettings::values.cache_game_list);
+ ui->cache_game_list->setChecked(UISettings::values.cache_game_list.GetValue());
UpdateEnabledControls();
}
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 38edb4d8d..1f647a0d1 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -24,8 +24,8 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
SetConfiguration();
if (Settings::IsConfiguringGlobal()) {
- connect(ui->toggle_frame_limit, &QCheckBox::clicked, ui->frame_limit,
- [this]() { ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked()); });
+ connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit,
+ [this]() { ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked()); });
}
connect(ui->button_reset_defaults, &QPushButton::clicked, this,
@@ -40,21 +40,23 @@ void ConfigureGeneral::SetConfiguration() {
ui->use_multi_core->setEnabled(runtime_lock);
ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue());
- ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
- ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
- ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background);
- ui->toggle_hide_mouse->setChecked(UISettings::values.hide_mouse);
+ ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue());
+ ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue());
+ ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background.GetValue());
+ ui->toggle_hide_mouse->setChecked(UISettings::values.hide_mouse.GetValue());
- ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit.GetValue());
- ui->frame_limit->setValue(Settings::values.frame_limit.GetValue());
+ ui->toggle_speed_limit->setChecked(Settings::values.use_speed_limit.GetValue());
+ ui->speed_limit->setValue(Settings::values.speed_limit.GetValue());
+
+ ui->fps_cap->setValue(Settings::values.fps_cap.GetValue());
ui->button_reset_defaults->setEnabled(runtime_lock);
if (Settings::IsConfiguringGlobal()) {
- ui->frame_limit->setEnabled(Settings::values.use_frame_limit.GetValue());
+ ui->speed_limit->setEnabled(Settings::values.use_speed_limit.GetValue());
} else {
- ui->frame_limit->setEnabled(Settings::values.use_frame_limit.GetValue() &&
- use_frame_limit != ConfigurationShared::CheckState::Global);
+ ui->speed_limit->setEnabled(Settings::values.use_speed_limit.GetValue() &&
+ use_speed_limit != ConfigurationShared::CheckState::Global);
}
}
@@ -87,20 +89,22 @@ void ConfigureGeneral::ApplyConfiguration() {
UISettings::values.pause_when_in_background = ui->toggle_background_pause->isChecked();
UISettings::values.hide_mouse = ui->toggle_hide_mouse->isChecked();
+ Settings::values.fps_cap.SetValue(ui->fps_cap->value());
+
// Guard if during game and set to game-specific value
- if (Settings::values.use_frame_limit.UsingGlobal()) {
- Settings::values.use_frame_limit.SetValue(ui->toggle_frame_limit->checkState() ==
+ if (Settings::values.use_speed_limit.UsingGlobal()) {
+ Settings::values.use_speed_limit.SetValue(ui->toggle_speed_limit->checkState() ==
Qt::Checked);
- Settings::values.frame_limit.SetValue(ui->frame_limit->value());
+ Settings::values.speed_limit.SetValue(ui->speed_limit->value());
}
} else {
- bool global_frame_limit = use_frame_limit == ConfigurationShared::CheckState::Global;
- Settings::values.use_frame_limit.SetGlobal(global_frame_limit);
- Settings::values.frame_limit.SetGlobal(global_frame_limit);
- if (!global_frame_limit) {
- Settings::values.use_frame_limit.SetValue(ui->toggle_frame_limit->checkState() ==
+ bool global_speed_limit = use_speed_limit == ConfigurationShared::CheckState::Global;
+ Settings::values.use_speed_limit.SetGlobal(global_speed_limit);
+ Settings::values.speed_limit.SetGlobal(global_speed_limit);
+ if (!global_speed_limit) {
+ Settings::values.use_speed_limit.SetValue(ui->toggle_speed_limit->checkState() ==
Qt::Checked);
- Settings::values.frame_limit.SetValue(ui->frame_limit->value());
+ Settings::values.speed_limit.SetValue(ui->speed_limit->value());
}
}
}
@@ -122,8 +126,8 @@ void ConfigureGeneral::SetupPerGameUI() {
// Disables each setting if:
// - A game is running (thus settings in use), and
// - A non-global setting is applied.
- ui->toggle_frame_limit->setEnabled(Settings::values.use_frame_limit.UsingGlobal());
- ui->frame_limit->setEnabled(Settings::values.frame_limit.UsingGlobal());
+ ui->toggle_speed_limit->setEnabled(Settings::values.use_speed_limit.UsingGlobal());
+ ui->speed_limit->setEnabled(Settings::values.speed_limit.UsingGlobal());
return;
}
@@ -135,13 +139,13 @@ void ConfigureGeneral::SetupPerGameUI() {
ui->button_reset_defaults->setVisible(false);
- ConfigurationShared::SetColoredTristate(ui->toggle_frame_limit,
- Settings::values.use_frame_limit, use_frame_limit);
+ ConfigurationShared::SetColoredTristate(ui->toggle_speed_limit,
+ Settings::values.use_speed_limit, use_speed_limit);
ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core,
use_multi_core);
- connect(ui->toggle_frame_limit, &QCheckBox::clicked, ui->frame_limit, [this]() {
- ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked() &&
- (use_frame_limit != ConfigurationShared::CheckState::Global));
+ connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() {
+ ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() &&
+ (use_speed_limit != ConfigurationShared::CheckState::Global));
});
}
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h
index a0fd52492..c9df37d73 100644
--- a/src/yuzu/configuration/configure_general.h
+++ b/src/yuzu/configuration/configure_general.h
@@ -43,6 +43,6 @@ private:
std::unique_ptr<Ui::ConfigureGeneral> ui;
- ConfigurationShared::CheckState use_frame_limit;
+ ConfigurationShared::CheckState use_speed_limit;
ConfigurationShared::CheckState use_multi_core;
};
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index bc7041090..8ce97edec 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -27,14 +27,14 @@
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
- <widget class="QCheckBox" name="toggle_frame_limit">
+ <widget class="QCheckBox" name="toggle_speed_limit">
<property name="text">
<string>Limit Speed Percent</string>
</property>
</widget>
</item>
<item>
- <widget class="QSpinBox" name="frame_limit">
+ <widget class="QSpinBox" name="speed_limit">
<property name="suffix">
<string>%</string>
</property>
@@ -52,6 +52,36 @@
</layout>
</item>
<item>
+ <layout class="QHBoxLayout" name="horizontalLayout_2">
+ <item>
+ <widget class="QLabel" name="fps_cap_label">
+ <property name="text">
+ <string>Framerate Cap</string>
+ </property>
+ <property name="toolTip">
+ <string>Requires the use of the FPS Limiter Toggle hotkey to take effect.</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QSpinBox" name="fps_cap">
+ <property name="suffix">
+ <string>x</string>
+ </property>
+ <property name="minimum">
+ <number>1</number>
+ </property>
+ <property name="maximum">
+ <number>1000</number>
+ </property>
+ <property name="value">
+ <number>500</number>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </item>
+ <item>
<widget class="QCheckBox" name="use_multi_core">
<property name="text">
<string>Multicore CPU Emulation</string>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 41a69d9b8..1bc477c96 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -26,19 +26,29 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
ui->setupUi(this);
+ for (const auto& device : vulkan_devices) {
+ ui->device->addItem(device);
+ }
+
+ ui->backend->addItem(QStringLiteral("GLSL"));
+ ui->backend->addItem(tr("GLASM (Assembly Shaders, NVIDIA Only)"));
+ ui->backend->addItem(QStringLiteral("SPIR-V (Experimental, Mesa Only)"));
+
SetupPerGameUI();
SetConfiguration();
connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] {
- UpdateDeviceComboBox();
+ UpdateAPILayout();
if (!Settings::IsConfiguringGlobal()) {
ConfigurationShared::SetHighlight(
- ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX);
+ ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX);
}
});
connect(ui->device, qOverload<int>(&QComboBox::activated), this,
[this](int device) { UpdateDeviceSelection(device); });
+ connect(ui->backend, qOverload<int>(&QComboBox::activated), this,
+ [this](int backend) { UpdateShaderBackendSelection(backend); });
connect(ui->bg_button, &QPushButton::clicked, this, [this] {
const QColor new_bg_color = QColorDialog::getColor(bg_color);
@@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) {
}
}
+void ConfigureGraphics::UpdateShaderBackendSelection(int backend) {
+ if (backend == -1) {
+ return;
+ }
+ if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) {
+ shader_backend = static_cast<Settings::ShaderBackend>(backend);
+ }
+}
+
ConfigureGraphics::~ConfigureGraphics() = default;
void ConfigureGraphics::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
- ui->api->setEnabled(runtime_lock);
+ ui->api_widget->setEnabled(runtime_lock);
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->use_nvdec_emulation->setEnabled(runtime_lock);
@@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() {
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
} else {
ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
- ConfigurationShared::SetHighlight(ui->api_layout,
+ ConfigurationShared::SetHighlight(ui->api_widget,
!Settings::values.renderer_backend.UsingGlobal());
ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
@@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() {
ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal());
ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal());
}
-
- UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red.GetValue(),
- Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue()));
- UpdateDeviceComboBox();
+ UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(),
+ Settings::values.bg_green.GetValue(),
+ Settings::values.bg_blue.GetValue()));
+ UpdateAPILayout();
}
void ConfigureGraphics::ApplyConfiguration() {
@@ -128,26 +146,36 @@ void ConfigureGraphics::ApplyConfiguration() {
if (Settings::values.renderer_backend.UsingGlobal()) {
Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
}
+ if (Settings::values.shader_backend.UsingGlobal()) {
+ Settings::values.shader_backend.SetValue(shader_backend);
+ }
if (Settings::values.vulkan_device.UsingGlobal()) {
Settings::values.vulkan_device.SetValue(vulkan_device);
}
if (Settings::values.bg_red.UsingGlobal()) {
- Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF()));
- Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF()));
- Settings::values.bg_blue.SetValue(static_cast<float>(bg_color.blueF()));
+ Settings::values.bg_red.SetValue(static_cast<u8>(bg_color.red()));
+ Settings::values.bg_green.SetValue(static_cast<u8>(bg_color.green()));
+ Settings::values.bg_blue.SetValue(static_cast<u8>(bg_color.blue()));
}
} else {
if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.renderer_backend.SetGlobal(true);
+ Settings::values.shader_backend.SetGlobal(true);
Settings::values.vulkan_device.SetGlobal(true);
} else {
Settings::values.renderer_backend.SetGlobal(false);
Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
- if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) {
+ switch (GetCurrentGraphicsBackend()) {
+ case Settings::RendererBackend::OpenGL:
+ Settings::values.shader_backend.SetGlobal(false);
+ Settings::values.vulkan_device.SetGlobal(true);
+ Settings::values.shader_backend.SetValue(shader_backend);
+ break;
+ case Settings::RendererBackend::Vulkan:
+ Settings::values.shader_backend.SetGlobal(true);
Settings::values.vulkan_device.SetGlobal(false);
Settings::values.vulkan_device.SetValue(vulkan_device);
- } else {
- Settings::values.vulkan_device.SetGlobal(true);
+ break;
}
}
@@ -159,9 +187,9 @@ void ConfigureGraphics::ApplyConfiguration() {
Settings::values.bg_red.SetGlobal(false);
Settings::values.bg_green.SetGlobal(false);
Settings::values.bg_blue.SetGlobal(false);
- Settings::values.bg_red.SetValue(static_cast<float>(bg_color.redF()));
- Settings::values.bg_green.SetValue(static_cast<float>(bg_color.greenF()));
- Settings::values.bg_blue.SetValue(static_cast<float>(bg_color.blueF()));
+ Settings::values.bg_red.SetValue(static_cast<u8>(bg_color.red()));
+ Settings::values.bg_green.SetValue(static_cast<u8>(bg_color.green()));
+ Settings::values.bg_blue.SetValue(static_cast<u8>(bg_color.blue()));
}
}
}
@@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
ui->bg_button->setIcon(color_icon);
}
-void ConfigureGraphics::UpdateDeviceComboBox() {
- ui->device->clear();
-
- bool enabled = false;
-
+void ConfigureGraphics::UpdateAPILayout() {
if (!Settings::IsConfiguringGlobal() &&
ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+ vulkan_device = Settings::values.vulkan_device.GetValue(true);
+ shader_backend = Settings::values.shader_backend.GetValue(true);
+ ui->device_widget->setEnabled(false);
+ ui->backend_widget->setEnabled(false);
+ } else {
vulkan_device = Settings::values.vulkan_device.GetValue();
+ shader_backend = Settings::values.shader_backend.GetValue();
+ ui->device_widget->setEnabled(true);
+ ui->backend_widget->setEnabled(true);
}
+
switch (GetCurrentGraphicsBackend()) {
case Settings::RendererBackend::OpenGL:
- ui->device->addItem(tr("OpenGL Graphics Device"));
- enabled = false;
+ ui->backend->setCurrentIndex(static_cast<u32>(shader_backend));
+ ui->device_widget->setVisible(false);
+ ui->backend_widget->setVisible(true);
break;
case Settings::RendererBackend::Vulkan:
- for (const auto& device : vulkan_devices) {
- ui->device->addItem(device);
- }
ui->device->setCurrentIndex(vulkan_device);
- enabled = !vulkan_devices.empty();
+ ui->device_widget->setVisible(true);
+ ui->backend_widget->setVisible(false);
break;
}
- // If in per-game config and use global is selected, don't enable.
- enabled &= !(!Settings::IsConfiguringGlobal() &&
- ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX);
- ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
}
void ConfigureGraphics::RetrieveVulkanDevices() try {
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 6418115cf..c866b911b 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -34,8 +34,9 @@ private:
void SetConfiguration();
void UpdateBackgroundColorButton(QColor color);
- void UpdateDeviceComboBox();
+ void UpdateAPILayout();
void UpdateDeviceSelection(int device);
+ void UpdateShaderBackendSelection(int backend);
void RetrieveVulkanDevices();
@@ -53,4 +54,5 @@ private:
std::vector<QString> vulkan_devices;
u32 vulkan_device{};
+ Settings::ShaderBackend shader_backend{};
};
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 5b999d84d..099ddbb7c 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -23,7 +23,7 @@
</property>
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
- <widget class="QWidget" name="api_layout" native="true">
+ <widget class="QWidget" name="api_widget" native="true">
<layout class="QGridLayout" name="gridLayout">
<property name="leftMargin">
<number>0</number>
@@ -40,37 +40,107 @@
<property name="horizontalSpacing">
<number>6</number>
</property>
- <item row="0" column="0">
- <widget class="QLabel" name="api_label">
- <property name="text">
- <string>API:</string>
- </property>
+ <item row="4" column="0">
+ <widget class="QWidget" name="backend_widget" native="true">
+ <layout class="QHBoxLayout" name="backend_layout">
+ <property name="leftMargin">
+ <number>0</number>
+ </property>
+ <property name="topMargin">
+ <number>0</number>
+ </property>
+ <property name="rightMargin">
+ <number>0</number>
+ </property>
+ <property name="bottomMargin">
+ <number>0</number>
+ </property>
+ <item>
+ <widget class="QLabel" name="backend_label">
+ <property name="text">
+ <string>Shader Backend:</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QComboBox" name="backend"/>
+ </item>
+ </layout>
</widget>
</item>
- <item row="0" column="1">
- <widget class="QComboBox" name="api">
- <item>
- <property name="text">
- <string notr="true">OpenGL</string>
+ <item row="2" column="0">
+ <widget class="QWidget" name="device_widget" native="true">
+ <layout class="QHBoxLayout" name="device_layout">
+ <property name="leftMargin">
+ <number>0</number>
</property>
- </item>
- <item>
- <property name="text">
- <string notr="true">Vulkan</string>
+ <property name="topMargin">
+ <number>0</number>
</property>
- </item>
+ <property name="rightMargin">
+ <number>0</number>
+ </property>
+ <property name="bottomMargin">
+ <number>0</number>
+ </property>
+ <item>
+ <widget class="QLabel" name="device_label">
+ <property name="text">
+ <string>Device:</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QComboBox" name="device"/>
+ </item>
+ </layout>
</widget>
</item>
- <item row="1" column="0">
- <widget class="QLabel" name="device_label">
- <property name="text">
- <string>Device:</string>
- </property>
+ <item row="0" column="0">
+ <widget class="QWidget" name="api_layout_2" native="true">
+ <layout class="QHBoxLayout" name="api_layout">
+ <property name="leftMargin">
+ <number>0</number>
+ </property>
+ <property name="topMargin">
+ <number>0</number>
+ </property>
+ <property name="rightMargin">
+ <number>0</number>
+ </property>
+ <property name="bottomMargin">
+ <number>0</number>
+ </property>
+ <item>
+ <widget class="QLabel" name="api_label">
+ <property name="text">
+ <string>API:</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QComboBox" name="api">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
+ <item>
+ <property name="text">
+ <string notr="true">OpenGL</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string notr="true">Vulkan</string>
+ </property>
+ </item>
+ </widget>
+ </item>
+ </layout>
</widget>
</item>
- <item row="1" column="1">
- <widget class="QComboBox" name="device"/>
- </item>
</layout>
</widget>
</item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 35bf9c6be..38276feb1 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -23,13 +23,12 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->use_vsync->setEnabled(runtime_lock);
- ui->use_assembly_shaders->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
- ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
+ ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
if (Settings::IsConfiguringGlobal()) {
@@ -57,11 +56,11 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
ui->anisotropic_filtering_combobox);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
- ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders,
- ui->use_assembly_shaders, use_assembly_shaders);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
ui->use_asynchronous_shaders,
use_asynchronous_shaders);
+ ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
+ use_caches_gc);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
ui->use_fast_gpu_time, use_fast_gpu_time);
@@ -97,10 +96,10 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
- ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal());
ui->use_asynchronous_shaders->setEnabled(
Settings::values.use_asynchronous_shaders.UsingGlobal());
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
+ ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
ui->anisotropic_filtering_combobox->setEnabled(
Settings::values.max_anisotropy.UsingGlobal());
@@ -108,13 +107,13 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
}
ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
- ConfigurationShared::SetColoredTristate(
- ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
Settings::values.use_asynchronous_shaders,
use_asynchronous_shaders);
ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
Settings::values.use_fast_gpu_time, use_fast_gpu_time);
+ ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
+ use_caches_gc);
ConfigurationShared::SetColoredComboBox(
ui->gpu_accuracy, ui->label_gpu_accuracy,
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index e61b571c7..7356e6916 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -35,7 +35,7 @@ private:
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
ConfigurationShared::CheckState use_vsync;
- ConfigurationShared::CheckState use_assembly_shaders;
ConfigurationShared::CheckState use_asynchronous_shaders;
ConfigurationShared::CheckState use_fast_gpu_time;
+ ConfigurationShared::CheckState use_caches_gc;
};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 846a30586..379dc5d2e 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -77,29 +77,29 @@
</widget>
</item>
<item>
- <widget class="QCheckBox" name="use_assembly_shaders">
+ <widget class="QCheckBox" name="use_asynchronous_shaders">
<property name="toolTip">
- <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
+ <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
</property>
<property name="text">
- <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
+ <string>Use asynchronous shader building</string>
</property>
</widget>
</item>
<item>
- <widget class="QCheckBox" name="use_asynchronous_shaders">
- <property name="toolTip">
- <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
- </property>
+ <widget class="QCheckBox" name="use_fast_gpu_time">
<property name="text">
- <string>Use asynchronous shader building (experimental)</string>
+ <string>Use Fast GPU Time</string>
</property>
</widget>
</item>
<item>
- <widget class="QCheckBox" name="use_fast_gpu_time">
+ <widget class="QCheckBox" name="use_caches_gc">
+ <property name="toolTip">
+ <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
+ </property>
<property name="text">
- <string>Use Fast GPU Time</string>
+ <string>Enable GPU cache garbage collection (experimental)</string>
</property>
</widget>
</item>
@@ -134,22 +134,22 @@
</item>
<item>
<property name="text">
- <string>2x</string>
+ <string>2x (WILL BREAK THINGS)</string>
</property>
</item>
<item>
<property name="text">
- <string>4x</string>
+ <string>4x (WILL BREAK THINGS)</string>
</property>
</item>
<item>
<property name="text">
- <string>8x</string>
+ <string>8x (WILL BREAK THINGS)</string>
</property>
</item>
<item>
<property name="text">
- <string>16x</string>
+ <string>16x (WILL BREAK THINGS)</string>
</property>
</item>
</widget>
diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp
index d8d3b83dc..2f1419b5b 100644
--- a/src/yuzu/configuration/configure_input_advanced.cpp
+++ b/src/yuzu/configuration/configure_input_advanced.cpp
@@ -148,12 +148,12 @@ void ConfigureInputAdvanced::LoadConfiguration() {
}
}
- ui->debug_enabled->setChecked(Settings::values.debug_pad_enabled);
- ui->mouse_enabled->setChecked(Settings::values.mouse_enabled);
- ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled);
- ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard);
- ui->mouse_panning->setChecked(Settings::values.mouse_panning);
- ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity);
+ ui->debug_enabled->setChecked(Settings::values.debug_pad_enabled.GetValue());
+ ui->mouse_enabled->setChecked(Settings::values.mouse_enabled.GetValue());
+ ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled.GetValue());
+ ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard.GetValue());
+ ui->mouse_panning->setChecked(Settings::values.mouse_panning.GetValue());
+ ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity.GetValue());
ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
UpdateUIEnabled();
diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui
index 173130d8d..d3ef5bd06 100644
--- a/src/yuzu/configuration/configure_input_advanced.ui
+++ b/src/yuzu/configuration/configure_input_advanced.ui
@@ -2573,27 +2573,24 @@
</widget>
</item>
<item row="2" column="2">
- <widget class="QDoubleSpinBox" name="mouse_panning_sensitivity">
+ <widget class="QSpinBox" name="mouse_panning_sensitivity">
<property name="toolTip">
<string>Mouse sensitivity</string>
</property>
<property name="alignment">
<set>Qt::AlignCenter</set>
</property>
- <property name="decimals">
- <number>2</number>
+ <property name="suffix">
+ <string>%</string>
</property>
<property name="minimum">
- <double>0.100000000000000</double>
+ <number>1</number>
</property>
<property name="maximum">
- <double>16.000000000000000</double>
- </property>
- <property name="singleStep">
- <double>0.010000000000000</double>
+ <number>100</number>
</property>
<property name="value">
- <double>1.000000000000000</double>
+ <number>100</number>
</property>
</widget>
</item>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index d5d624b96..6b9bd05f1 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -149,8 +149,9 @@ QString ButtonToText(const Common::ParamPackage& param) {
if (param.Has("button")) {
const QString button_str = QString::fromStdString(param.Get("button", ""));
+ const QString toggle = QString::fromStdString(param.Get("toggle", false) ? "~" : "");
- return QObject::tr("Button %1").arg(button_str);
+ return QObject::tr("%1Button %2").arg(toggle, button_str);
}
if (param.Has("motion")) {
@@ -313,6 +314,24 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
buttons_param[button_id].Set("toggle", toggle_value);
button_map[button_id]->setText(ButtonToText(buttons_param[button_id]));
});
+ if (buttons_param[button_id].Has("threshold")) {
+ context_menu.addAction(tr("Set threshold"), [&] {
+ const int button_threshold = static_cast<int>(
+ buttons_param[button_id].Get("threshold", 0.5f) * 100.0f);
+ const int new_threshold = QInputDialog::getInt(
+ this, tr("Set threshold"), tr("Choose a value between 0% and 100%"),
+ button_threshold, 0, 100);
+ buttons_param[button_id].Set("threshold", new_threshold / 100.0f);
+
+ if (button_id == Settings::NativeButton::ZL) {
+ ui->sliderZLThreshold->setValue(new_threshold);
+ }
+ if (button_id == Settings::NativeButton::ZR) {
+ ui->sliderZRThreshold->setValue(new_threshold);
+ }
+ });
+ }
+
context_menu.exec(button_map[button_id]->mapToGlobal(menu_location));
ui->controllerFrame->SetPlayerInput(player_index, buttons_param, analogs_param);
});
@@ -341,6 +360,20 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
});
}
+ connect(ui->sliderZLThreshold, &QSlider::valueChanged, [=, this] {
+ if (buttons_param[Settings::NativeButton::ZL].Has("threshold")) {
+ const auto slider_value = ui->sliderZLThreshold->value();
+ buttons_param[Settings::NativeButton::ZL].Set("threshold", slider_value / 100.0f);
+ }
+ });
+
+ connect(ui->sliderZRThreshold, &QSlider::valueChanged, [=, this] {
+ if (buttons_param[Settings::NativeButton::ZR].Has("threshold")) {
+ const auto slider_value = ui->sliderZRThreshold->value();
+ buttons_param[Settings::NativeButton::ZR].Set("threshold", slider_value / 100.0f);
+ }
+ });
+
for (int analog_id = 0; analog_id < Settings::NativeAnalog::NumAnalogs; ++analog_id) {
for (int sub_button_id = 0; sub_button_id < ANALOG_SUB_BUTTONS_NUM; ++sub_button_id) {
auto* const analog_button = analog_map_buttons[analog_id][sub_button_id];
@@ -849,6 +882,18 @@ void ConfigureInputPlayer::UpdateUI() {
button_map[button]->setText(ButtonToText(buttons_param[button]));
}
+ if (buttons_param[Settings::NativeButton::ZL].Has("threshold")) {
+ const int button_threshold = static_cast<int>(
+ buttons_param[Settings::NativeButton::ZL].Get("threshold", 0.5f) * 100.0f);
+ ui->sliderZLThreshold->setValue(button_threshold);
+ }
+
+ if (buttons_param[Settings::NativeButton::ZR].Has("threshold")) {
+ const int button_threshold = static_cast<int>(
+ buttons_param[Settings::NativeButton::ZR].Get("threshold", 0.5f) * 100.0f);
+ ui->sliderZRThreshold->setValue(button_threshold);
+ }
+
for (int motion_id = 0; motion_id < Settings::NativeMotion::NumMotions; ++motion_id) {
motion_map[motion_id]->setText(ButtonToText(motions_param[motion_id]));
}
diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui
index e76aa484f..e7433912b 100644
--- a/src/yuzu/configuration/configure_input_player.ui
+++ b/src/yuzu/configuration/configure_input_player.ui
@@ -1334,6 +1334,12 @@
</item>
<item>
<widget class="QGroupBox" name="buttonShoulderButtonsButtonZLGroup">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
<property name="title">
<string>ZL</string>
</property>
@@ -1378,6 +1384,22 @@
</property>
</widget>
</item>
+ <item>
+ <widget class="QSlider" name="sliderZLThreshold">
+ <property name="maximumSize">
+ <size>
+ <width>70</width>
+ <height>15</height>
+ </size>
+ </property>
+ <property name="maximum">
+ <number>100</number>
+ </property>
+ <property name="orientation">
+ <enum>Qt::Horizontal</enum>
+ </property>
+ </widget>
+ </item>
</layout>
</widget>
</item>
@@ -1759,6 +1781,12 @@
</item>
<item>
<widget class="QGroupBox" name="buttonShoulderButtonsZRGroup">
+ <property name="sizePolicy">
+ <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
+ <horstretch>0</horstretch>
+ <verstretch>0</verstretch>
+ </sizepolicy>
+ </property>
<property name="title">
<string>ZR</string>
</property>
@@ -1803,6 +1831,22 @@
</property>
</widget>
</item>
+ <item>
+ <widget class="QSlider" name="sliderZRThreshold">
+ <property name="maximumSize">
+ <size>
+ <width>70</width>
+ <height>15</height>
+ </size>
+ </property>
+ <property name="maximum">
+ <number>100</number>
+ </property>
+ <property name="orientation">
+ <enum>Qt::Horizontal</enum>
+ </property>
+ </widget>
+ </item>
</layout>
</widget>
</item>
diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp
index 6a5d625df..f8e08c422 100644
--- a/src/yuzu/configuration/configure_motion_touch.cpp
+++ b/src/yuzu/configuration/configure_motion_touch.cpp
@@ -101,15 +101,16 @@ ConfigureMotionTouch::ConfigureMotionTouch(QWidget* parent,
ConfigureMotionTouch::~ConfigureMotionTouch() = default;
void ConfigureMotionTouch::SetConfiguration() {
- const Common::ParamPackage motion_param(Settings::values.motion_device);
- const Common::ParamPackage touch_param(Settings::values.touch_device);
+ const Common::ParamPackage motion_param(Settings::values.motion_device.GetValue());
+ const Common::ParamPackage touch_param(Settings::values.touch_device.GetValue());
- ui->touch_from_button_checkbox->setChecked(Settings::values.use_touch_from_button);
+ ui->touch_from_button_checkbox->setChecked(Settings::values.use_touch_from_button.GetValue());
touch_from_button_maps = Settings::values.touch_from_button_maps;
for (const auto& touch_map : touch_from_button_maps) {
ui->touch_from_button_map->addItem(QString::fromStdString(touch_map.name));
}
- ui->touch_from_button_map->setCurrentIndex(Settings::values.touch_from_button_map_index);
+ ui->touch_from_button_map->setCurrentIndex(
+ Settings::values.touch_from_button_map_index.GetValue());
ui->motion_sensitivity->setValue(motion_param.Get("sensitivity", 0.01f));
min_x = touch_param.Get("min_x", 100);
@@ -124,7 +125,7 @@ void ConfigureMotionTouch::SetConfiguration() {
udp_server_list_model->setStringList({});
ui->udp_server_list->setModel(udp_server_list_model);
- std::stringstream ss(Settings::values.udp_input_servers);
+ std::stringstream ss(Settings::values.udp_input_servers.GetValue());
std::string token;
while (std::getline(ss, token, ',')) {
diff --git a/src/yuzu/configuration/configure_per_game.cpp b/src/yuzu/configuration/configure_per_game.cpp
index a1d434aca..8c00eec59 100644
--- a/src/yuzu/configuration/configure_per_game.cpp
+++ b/src/yuzu/configuration/configure_per_game.cpp
@@ -47,6 +47,8 @@ ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id, const std::str
ui->setupUi(this);
setFocusPolicy(Qt::ClickFocus);
setWindowTitle(tr("Properties"));
+ // remove Help question mark button from the title bar
+ setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
ui->addonsTab->SetTitleId(title_id);
diff --git a/src/yuzu/configuration/configure_per_game.ui b/src/yuzu/configuration/configure_per_game.ui
index adf6d0b39..7da14146b 100644
--- a/src/yuzu/configuration/configure_per_game.ui
+++ b/src/yuzu/configuration/configure_per_game.ui
@@ -6,10 +6,15 @@
<rect>
<x>0</x>
<y>0</y>
- <width>800</width>
+ <width>900</width>
<height>600</height>
</rect>
</property>
+ <property name="minimumSize">
+ <size>
+ <width>900</width>
+ </size>
+ </property>
<property name="windowTitle">
<string>Dialog</string>
</property>
diff --git a/src/yuzu/configuration/configure_per_game_addons.cpp b/src/yuzu/configuration/configure_per_game_addons.cpp
index 9b709d405..ebb0f411c 100644
--- a/src/yuzu/configuration/configure_per_game_addons.cpp
+++ b/src/yuzu/configuration/configure_per_game_addons.cpp
@@ -79,8 +79,8 @@ void ConfigurePerGameAddons::ApplyConfiguration() {
std::sort(disabled_addons.begin(), disabled_addons.end());
std::sort(current.begin(), current.end());
if (disabled_addons != current) {
- void(Common::FS::RemoveFile(Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
- "game_list" / fmt::format("{:016X}.pv.txt", title_id)));
+ Common::FS::RemoveFile(Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
+ "game_list" / fmt::format("{:016X}.pv.txt", title_id));
}
Settings::values.disabled_addons[title_id] = disabled_addons;
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp
index f5881e58d..ac849b01d 100644
--- a/src/yuzu/configuration/configure_profile_manager.cpp
+++ b/src/yuzu/configuration/configure_profile_manager.cpp
@@ -166,7 +166,7 @@ void ConfigureProfileManager::PopulateUserList() {
void ConfigureProfileManager::UpdateCurrentUser() {
ui->pm_add->setEnabled(profile_manager->GetUserCount() < Service::Account::MAX_USERS);
- const auto& current_user = profile_manager->GetUser(Settings::values.current_user);
+ const auto& current_user = profile_manager->GetUser(Settings::values.current_user.GetValue());
ASSERT(current_user);
const auto username = GetAccountUsername(*profile_manager, *current_user);
@@ -245,15 +245,18 @@ void ConfigureProfileManager::DeleteUser() {
this, tr("Confirm Delete"),
tr("You are about to delete user with name \"%1\". Are you sure?").arg(username));
- if (confirm == QMessageBox::No)
+ if (confirm == QMessageBox::No) {
return;
+ }
- if (Settings::values.current_user == tree_view->currentIndex().row())
+ if (Settings::values.current_user.GetValue() == tree_view->currentIndex().row()) {
Settings::values.current_user = 0;
+ }
UpdateCurrentUser();
- if (!profile_manager->RemoveUser(*uuid))
+ if (!profile_manager->RemoveUser(*uuid)) {
return;
+ }
item_model->removeRows(tree_view->currentIndex().row(), 1);
tree_view->clearSelection();
diff --git a/src/yuzu/configuration/configure_service.cpp b/src/yuzu/configuration/configure_service.cpp
index 6d954a67f..4aa424803 100644
--- a/src/yuzu/configuration/configure_service.cpp
+++ b/src/yuzu/configuration/configure_service.cpp
@@ -65,7 +65,7 @@ void ConfigureService::RetranslateUi() {
void ConfigureService::SetConfiguration() {
const int index =
- ui->bcat_source->findData(QString::fromStdString(Settings::values.bcat_backend));
+ ui->bcat_source->findData(QString::fromStdString(Settings::values.bcat_backend.GetValue()));
ui->bcat_source->setCurrentIndex(index == -1 ? 0 : index);
}
diff --git a/src/yuzu/configuration/configure_ui.cpp b/src/yuzu/configuration/configure_ui.cpp
index 9674119e1..e8f41bf65 100644
--- a/src/yuzu/configuration/configure_ui.cpp
+++ b/src/yuzu/configuration/configure_ui.cpp
@@ -113,11 +113,12 @@ void ConfigureUi::SetConfiguration() {
ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
ui->language_combobox->setCurrentIndex(
ui->language_combobox->findData(UISettings::values.language));
- ui->show_add_ons->setChecked(UISettings::values.show_add_ons);
+ ui->show_add_ons->setChecked(UISettings::values.show_add_ons.GetValue());
ui->icon_size_combobox->setCurrentIndex(
- ui->icon_size_combobox->findData(UISettings::values.icon_size));
+ ui->icon_size_combobox->findData(UISettings::values.icon_size.GetValue()));
- ui->enable_screenshot_save_as->setChecked(UISettings::values.enable_screenshot_save_as);
+ ui->enable_screenshot_save_as->setChecked(
+ UISettings::values.enable_screenshot_save_as.GetValue());
ui->screenshot_path_edit->setText(QString::fromStdString(
Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)));
}
@@ -178,7 +179,7 @@ void ConfigureUi::InitializeRowComboBoxes() {
void ConfigureUi::UpdateFirstRowComboBox(bool init) {
const int currentIndex =
- init ? UISettings::values.row_1_text_id
+ init ? UISettings::values.row_1_text_id.GetValue()
: ui->row_1_text_combobox->findData(ui->row_1_text_combobox->currentData());
ui->row_1_text_combobox->clear();
@@ -197,7 +198,7 @@ void ConfigureUi::UpdateFirstRowComboBox(bool init) {
void ConfigureUi::UpdateSecondRowComboBox(bool init) {
const int currentIndex =
- init ? UISettings::values.row_2_text_id
+ init ? UISettings::values.row_2_text_id.GetValue()
: ui->row_2_text_combobox->findData(ui->row_2_text_combobox->currentData());
ui->row_2_text_combobox->clear();
diff --git a/src/yuzu/configuration/configure_web.cpp b/src/yuzu/configuration/configure_web.cpp
index f3f3b54d6..d779251b4 100644
--- a/src/yuzu/configuration/configure_web.cpp
+++ b/src/yuzu/configuration/configure_web.cpp
@@ -88,22 +88,22 @@ void ConfigureWeb::SetConfiguration() {
ui->web_signup_link->setOpenExternalLinks(true);
ui->web_token_info_link->setOpenExternalLinks(true);
- if (Settings::values.yuzu_username.empty()) {
+ if (Settings::values.yuzu_username.GetValue().empty()) {
ui->username->setText(tr("Unspecified"));
} else {
- ui->username->setText(QString::fromStdString(Settings::values.yuzu_username));
+ ui->username->setText(QString::fromStdString(Settings::values.yuzu_username.GetValue()));
}
- ui->toggle_telemetry->setChecked(Settings::values.enable_telemetry);
- ui->edit_token->setText(QString::fromStdString(
- GenerateDisplayToken(Settings::values.yuzu_username, Settings::values.yuzu_token)));
+ ui->toggle_telemetry->setChecked(Settings::values.enable_telemetry.GetValue());
+ ui->edit_token->setText(QString::fromStdString(GenerateDisplayToken(
+ Settings::values.yuzu_username.GetValue(), Settings::values.yuzu_token.GetValue())));
// Connect after setting the values, to avoid calling OnLoginChanged now
connect(ui->edit_token, &QLineEdit::textChanged, this, &ConfigureWeb::OnLoginChanged);
user_verified = true;
- ui->toggle_discordrpc->setChecked(UISettings::values.enable_discord_presence);
+ ui->toggle_discordrpc->setChecked(UISettings::values.enable_discord_presence.GetValue());
}
void ConfigureWeb::ApplyConfiguration() {
diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp
index c11a326ac..22ca1285d 100644
--- a/src/yuzu/debugger/console.cpp
+++ b/src/yuzu/debugger/console.cpp
@@ -15,10 +15,10 @@
namespace Debugger {
void ToggleConsole() {
static bool console_shown = false;
- if (console_shown == UISettings::values.show_console) {
+ if (console_shown == UISettings::values.show_console.GetValue()) {
return;
} else {
- console_shown = UISettings::values.show_console;
+ console_shown = UISettings::values.show_console.GetValue();
}
#if defined(_WIN32) && !defined(_DEBUG)
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index efdc6aa50..7a6f84d96 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
}
void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
ev->accept();
}
void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
ev->accept();
}
void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
ev->accept();
}
void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
- MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120);
+ MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale,
+ ev->angleDelta().y() / 120);
ev->accept();
}
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index da956c99b..f746bd85d 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -244,7 +244,8 @@ void GameList::OnUpdateThemedIcons() {
for (int i = 0; i < item_model->invisibleRootItem()->rowCount(); i++) {
QStandardItem* child = item_model->invisibleRootItem()->child(i);
- const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
+ const int icon_size =
+ std::min(static_cast<int>(UISettings::values.icon_size.GetValue()), 64);
switch (child->data(GameListItem::TypeRole).value<GameListItemType>()) {
case GameListItemType::SdmcDir:
child->setData(
@@ -403,9 +404,11 @@ void GameList::ValidateEntry(const QModelIndex& item) {
return;
}
+ const auto title_id = selected.data(GameListItemPath::ProgramIdRole).toULongLong();
+
// Users usually want to run a different game after closing one
search_field->clear();
- emit GameChosen(file_path);
+ emit GameChosen(file_path, title_id);
break;
}
case GameListItemType::AddDir:
@@ -517,11 +520,15 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
- QAction* remove_shader_cache = remove_menu->addAction(tr("Remove Shader Cache"));
QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
+ QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
+ QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
remove_menu->addSeparator();
+ QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
- QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
+ QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
+ QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
+ QAction* dump_romfs_sdmc = dump_romfs_menu->addAction(tr("Dump RomFS to SDMC"));
QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));
context_menu.addSeparator();
@@ -535,6 +542,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
open_transferable_shader_cache->setVisible(program_id != 0);
remove_update->setVisible(program_id != 0);
remove_dlc->setVisible(program_id != 0);
+ remove_gl_shader_cache->setVisible(program_id != 0);
+ remove_vk_shader_cache->setVisible(program_id != 0);
remove_shader_cache->setVisible(program_id != 0);
remove_all_content->setVisible(program_id != 0);
auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
@@ -545,10 +554,10 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData, path);
});
connect(start_game, &QAction::triggered, [this, path]() {
- emit BootGame(QString::fromStdString(path), 0, StartGameType::Normal);
+ emit BootGame(QString::fromStdString(path), 0, 0, StartGameType::Normal);
});
connect(start_game_global, &QAction::triggered, [this, path]() {
- emit BootGame(QString::fromStdString(path), 0, StartGameType::Global);
+ emit BootGame(QString::fromStdString(path), 0, 0, StartGameType::Global);
});
connect(open_mod_location, &QAction::triggered, [this, program_id, path]() {
emit OpenFolderRequested(program_id, GameListOpenTarget::ModData, path);
@@ -564,14 +573,24 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
connect(remove_dlc, &QAction::triggered, [this, program_id]() {
emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent);
});
+ connect(remove_gl_shader_cache, &QAction::triggered, [this, program_id, path]() {
+ emit RemoveFileRequested(program_id, GameListRemoveTarget::GlShaderCache, path);
+ });
+ connect(remove_vk_shader_cache, &QAction::triggered, [this, program_id, path]() {
+ emit RemoveFileRequested(program_id, GameListRemoveTarget::VkShaderCache, path);
+ });
connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() {
- emit RemoveFileRequested(program_id, GameListRemoveTarget::ShaderCache, path);
+ emit RemoveFileRequested(program_id, GameListRemoveTarget::AllShaderCache, path);
});
connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
});
- connect(dump_romfs, &QAction::triggered,
- [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); });
+ connect(dump_romfs, &QAction::triggered, [this, program_id, path]() {
+ emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal);
+ });
+ connect(dump_romfs_sdmc, &QAction::triggered, [this, program_id, path]() {
+ emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::SDMC);
+ });
connect(copy_tid, &QAction::triggered,
[this, program_id]() { emit CopyTIDRequested(program_id); });
connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() {
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b630e34ff..10339dcca 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -41,10 +41,17 @@ enum class GameListOpenTarget {
};
enum class GameListRemoveTarget {
- ShaderCache,
+ GlShaderCache,
+ VkShaderCache,
+ AllShaderCache,
CustomConfiguration,
};
+enum class DumpRomFSTarget {
+ Normal,
+ SDMC,
+};
+
enum class InstalledEntryType {
Game,
Update,
@@ -83,8 +90,9 @@ public:
static const QStringList supported_file_extensions;
signals:
- void BootGame(const QString& game_path, std::size_t program_index, StartGameType type);
- void GameChosen(const QString& game_path);
+ void BootGame(const QString& game_path, u64 program_id, std::size_t program_index,
+ StartGameType type);
+ void GameChosen(const QString& game_path, const u64 title_id = 0);
void ShouldCancelWorker();
void OpenFolderRequested(u64 program_id, GameListOpenTarget target,
const std::string& game_path);
@@ -92,7 +100,7 @@ signals:
void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type);
void RemoveFileRequested(u64 program_id, GameListRemoveTarget target,
const std::string& game_path);
- void DumpRomFSRequested(u64 program_id, const std::string& game_path);
+ void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
void CopyTIDRequested(u64 program_id);
void NavigateToGamedbEntryRequested(u64 program_id,
const CompatibilityList& compatibility_list);
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 978d27325..982c0789d 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -80,7 +80,7 @@ public:
setData(qulonglong(program_id), ProgramIdRole);
setData(game_type, FileTypeRole);
- const u32 size = UISettings::values.icon_size;
+ const u32 size = UISettings::values.icon_size.GetValue();
QPixmap picture;
if (!picture.loadFromData(picture_data.data(), static_cast<u32>(picture_data.size()))) {
@@ -108,8 +108,8 @@ public:
data(TitleRole).toString(),
}};
- const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
- const int row2_id = UISettings::values.row_2_text_id;
+ const auto& row1 = row_data.at(UISettings::values.row_1_text_id.GetValue());
+ const int row2_id = UISettings::values.row_2_text_id.GetValue();
if (role == SortRole) {
return row1.toLower();
@@ -233,7 +233,8 @@ public:
UISettings::GameDir* game_dir = &directory;
setData(QVariant(UISettings::values.game_dirs.indexOf(directory)), GameDirRole);
- const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
+ const int icon_size =
+ std::min(static_cast<int>(UISettings::values.icon_size.GetValue()), 64);
switch (dir_type) {
case GameListItemType::SdmcDir:
setData(
@@ -294,7 +295,8 @@ public:
explicit GameListAddDir() {
setData(type(), TypeRole);
- const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
+ const int icon_size =
+ std::min(static_cast<int>(UISettings::values.icon_size.GetValue()), 64);
setData(QIcon::fromTheme(QStringLiteral("plus"))
.pixmap(icon_size)
.scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
@@ -316,7 +318,8 @@ public:
explicit GameListFavorites() {
setData(type(), TypeRole);
- const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
+ const int icon_size =
+ std::min(static_cast<int>(UISettings::values.icon_size.GetValue()), 64);
setData(QIcon::fromTheme(QStringLiteral("star"))
.pixmap(icon_size)
.scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp
index 33cc90d5a..2d5492157 100644
--- a/src/yuzu/game_list_worker.cpp
+++ b/src/yuzu/game_list_worker.cpp
@@ -336,18 +336,44 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa
}
}
} else {
- std::vector<u8> icon;
- [[maybe_unused]] const auto res1 = loader->ReadIcon(icon);
+ std::vector<u64> program_ids;
+ loader->ReadProgramIds(program_ids);
+
+ if (res2 == Loader::ResultStatus::Success && program_ids.size() > 1 &&
+ (file_type == Loader::FileType::XCI || file_type == Loader::FileType::NSP)) {
+ for (const auto id : program_ids) {
+ loader = Loader::GetLoader(system, file, id);
+ if (!loader) {
+ continue;
+ }
+
+ std::vector<u8> icon;
+ [[maybe_unused]] const auto res1 = loader->ReadIcon(icon);
- std::string name = " ";
- [[maybe_unused]] const auto res3 = loader->ReadTitle(name);
+ std::string name = " ";
+ [[maybe_unused]] const auto res3 = loader->ReadTitle(name);
- const FileSys::PatchManager patch{program_id, system.GetFileSystemController(),
- system.GetContentProvider()};
+ const FileSys::PatchManager patch{id, system.GetFileSystemController(),
+ system.GetContentProvider()};
+
+ emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, id,
+ compatibility_list, patch),
+ parent_dir);
+ }
+ } else {
+ std::vector<u8> icon;
+ [[maybe_unused]] const auto res1 = loader->ReadIcon(icon);
- emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id,
- compatibility_list, patch),
- parent_dir);
+ std::string name = " ";
+ [[maybe_unused]] const auto res3 = loader->ReadTitle(name);
+
+ const FileSys::PatchManager patch{program_id, system.GetFileSystemController(),
+ system.GetContentProvider()};
+
+ emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader,
+ program_id, compatibility_list, patch),
+ parent_dir);
+ }
}
} else if (is_dir) {
watch_list.append(QString::fromStdString(physical_name));
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index be8933c5c..e172d2ff4 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -11,11 +11,11 @@
#endif
// VFS includes must be before glad as they will conflict with Windows file api, which uses defines.
-#include "applets/controller.h"
-#include "applets/error.h"
-#include "applets/profile_select.h"
-#include "applets/software_keyboard.h"
-#include "applets/web_browser.h"
+#include "applets/qt_controller.h"
+#include "applets/qt_error.h"
+#include "applets/qt_profile_select.h"
+#include "applets/qt_software_keyboard.h"
+#include "applets/qt_web_browser.h"
#include "common/nvidia_flags.h"
#include "configuration/configure_input.h"
#include "configuration/configure_per_game.h"
@@ -104,6 +104,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "input_common/main.h"
#include "util/overlay_dialog.h"
#include "video_core/gpu.h"
+#include "video_core/renderer_base.h"
#include "video_core/shader_notify.h"
#include "yuzu/about_dialog.h"
#include "yuzu/bootmanager.h"
@@ -155,11 +156,13 @@ enum class CalloutFlag : uint32_t {
};
void GMainWindow::ShowTelemetryCallout() {
- if (UISettings::values.callout_flags & static_cast<uint32_t>(CalloutFlag::Telemetry)) {
+ if (UISettings::values.callout_flags.GetValue() &
+ static_cast<uint32_t>(CalloutFlag::Telemetry)) {
return;
}
- UISettings::values.callout_flags |= static_cast<uint32_t>(CalloutFlag::Telemetry);
+ UISettings::values.callout_flags =
+ UISettings::values.callout_flags.GetValue() | static_cast<uint32_t>(CalloutFlag::Telemetry);
const QString telemetry_message =
tr("<a href='https://yuzu-emu.org/help/feature/telemetry/'>Anonymous "
"data is collected</a> to help improve yuzu. "
@@ -176,7 +179,7 @@ static void InitializeLogging() {
using namespace Common;
Log::Filter log_filter;
- log_filter.ParseFilterString(Settings::values.log_filter);
+ log_filter.ParseFilterString(Settings::values.log_filter.GetValue());
Log::SetGlobalFilter(log_filter);
const auto log_dir = FS::GetYuzuPath(FS::YuzuPath::LogDir);
@@ -194,10 +197,10 @@ static void RemoveCachedContents() {
const auto offline_legal_information = cache_dir / "offline_web_applet_legal_information";
const auto offline_system_data = cache_dir / "offline_web_applet_system_data";
- void(Common::FS::RemoveDirRecursively(offline_fonts));
- void(Common::FS::RemoveDirRecursively(offline_manual));
- void(Common::FS::RemoveDirRecursively(offline_legal_information));
- void(Common::FS::RemoveDirRecursively(offline_system_data));
+ Common::FS::RemoveDirRecursively(offline_fonts);
+ Common::FS::RemoveDirRecursively(offline_manual);
+ Common::FS::RemoveDirRecursively(offline_legal_information);
+ Common::FS::RemoveDirRecursively(offline_system_data);
}
GMainWindow::GMainWindow()
@@ -215,7 +218,7 @@ GMainWindow::GMainWindow()
default_theme_paths = QIcon::themeSearchPaths();
UpdateUITheme();
- SetDiscordEnabled(UISettings::values.enable_discord_presence);
+ SetDiscordEnabled(UISettings::values.enable_discord_presence.GetValue());
discord_rpc->Update();
RegisterMetaTypes();
@@ -236,7 +239,8 @@ GMainWindow::GMainWindow()
const auto build_id = std::string(Common::g_build_id);
const auto yuzu_build = fmt::format("yuzu Development Build | {}-{}", branch_name, description);
- const auto override_build = fmt::format(std::string(Common::g_title_bar_format_idle), build_id);
+ const auto override_build =
+ fmt::format(fmt::runtime(std::string(Common::g_title_bar_format_idle)), build_id);
const auto yuzu_build_version = override_build.empty() ? yuzu_build : override_build;
LOG_INFO(Frontend, "yuzu Version: {}", yuzu_build_version);
@@ -785,41 +789,28 @@ void GMainWindow::InitializeWidgets() {
dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
statusBar()->insertPermanentWidget(0, dock_status_button);
- // Setup ASync button
- async_status_button = new QPushButton();
- async_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
- async_status_button->setFocusPolicy(Qt::NoFocus);
- connect(async_status_button, &QPushButton::clicked, [&] {
- if (emulation_running) {
- return;
+ gpu_accuracy_button = new QPushButton();
+ gpu_accuracy_button->setObjectName(QStringLiteral("GPUStatusBarButton"));
+ gpu_accuracy_button->setCheckable(true);
+ gpu_accuracy_button->setFocusPolicy(Qt::NoFocus);
+ connect(gpu_accuracy_button, &QPushButton::clicked, [this] {
+ switch (Settings::values.gpu_accuracy.GetValue()) {
+ case Settings::GPUAccuracy::High: {
+ Settings::values.gpu_accuracy.SetValue(Settings::GPUAccuracy::Normal);
+ break;
}
- Settings::values.use_asynchronous_gpu_emulation.SetValue(
- !Settings::values.use_asynchronous_gpu_emulation.GetValue());
- async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
- Core::System::GetInstance().ApplySettings();
- });
- async_status_button->setText(tr("ASYNC"));
- async_status_button->setCheckable(true);
- async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
-
- // Setup Multicore button
- multicore_status_button = new QPushButton();
- multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
- multicore_status_button->setFocusPolicy(Qt::NoFocus);
- connect(multicore_status_button, &QPushButton::clicked, [&] {
- if (emulation_running) {
- return;
+ case Settings::GPUAccuracy::Normal:
+ case Settings::GPUAccuracy::Extreme:
+ default: {
+ Settings::values.gpu_accuracy.SetValue(Settings::GPUAccuracy::High);
}
- Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue());
- multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
+ }
+
Core::System::GetInstance().ApplySettings();
+ UpdateGPUAccuracyButton();
});
- multicore_status_button->setText(tr("MULTICORE"));
- multicore_status_button->setCheckable(true);
- multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
-
- statusBar()->insertPermanentWidget(0, multicore_status_button);
- statusBar()->insertPermanentWidget(0, async_status_button);
+ UpdateGPUAccuracyButton();
+ statusBar()->insertPermanentWidget(0, gpu_accuracy_button);
// Setup Renderer API button
renderer_status_button = new QPushButton();
@@ -981,23 +972,23 @@ void GMainWindow::InitializeHotkeys() {
});
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Speed Limit"), this),
&QShortcut::activated, this, [&] {
- Settings::values.use_frame_limit.SetValue(
- !Settings::values.use_frame_limit.GetValue());
+ Settings::values.use_speed_limit.SetValue(
+ !Settings::values.use_speed_limit.GetValue());
UpdateStatusBar();
});
constexpr u16 SPEED_LIMIT_STEP = 5;
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
&QShortcut::activated, this, [&] {
- if (Settings::values.frame_limit.GetValue() < 9999 - SPEED_LIMIT_STEP) {
- Settings::values.frame_limit.SetValue(SPEED_LIMIT_STEP +
- Settings::values.frame_limit.GetValue());
+ if (Settings::values.speed_limit.GetValue() < 9999 - SPEED_LIMIT_STEP) {
+ Settings::values.speed_limit.SetValue(SPEED_LIMIT_STEP +
+ Settings::values.speed_limit.GetValue());
UpdateStatusBar();
}
});
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Decrease Speed Limit"), this),
&QShortcut::activated, this, [&] {
- if (Settings::values.frame_limit.GetValue() > SPEED_LIMIT_STEP) {
- Settings::values.frame_limit.SetValue(Settings::values.frame_limit.GetValue() -
+ if (Settings::values.speed_limit.GetValue() > SPEED_LIMIT_STEP) {
+ Settings::values.speed_limit.SetValue(Settings::values.speed_limit.GetValue() -
SPEED_LIMIT_STEP);
UpdateStatusBar();
}
@@ -1025,7 +1016,11 @@ void GMainWindow::InitializeHotkeys() {
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
&QShortcut::activated, this,
[] { Settings::values.audio_muted = !Settings::values.audio_muted; });
-
+ connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Framerate Limit"), this),
+ &QShortcut::activated, this, [] {
+ Settings::values.disable_fps_limit.SetValue(
+ !Settings::values.disable_fps_limit.GetValue());
+ });
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this),
&QShortcut::activated, this, [&] {
Settings::values.mouse_panning = !Settings::values.mouse_panning;
@@ -1054,23 +1049,24 @@ void GMainWindow::RestoreUIState() {
render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
#if MICROPROFILE_ENABLED
microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
- microProfileDialog->setVisible(UISettings::values.microprofile_visible);
+ microProfileDialog->setVisible(UISettings::values.microprofile_visible.GetValue());
#endif
game_list->LoadInterfaceLayout();
- ui.action_Single_Window_Mode->setChecked(UISettings::values.single_window_mode);
+ ui.action_Single_Window_Mode->setChecked(UISettings::values.single_window_mode.GetValue());
ToggleWindowMode();
- ui.action_Fullscreen->setChecked(UISettings::values.fullscreen);
+ ui.action_Fullscreen->setChecked(UISettings::values.fullscreen.GetValue());
- ui.action_Display_Dock_Widget_Headers->setChecked(UISettings::values.display_titlebar);
+ ui.action_Display_Dock_Widget_Headers->setChecked(
+ UISettings::values.display_titlebar.GetValue());
OnDisplayTitleBars(ui.action_Display_Dock_Widget_Headers->isChecked());
- ui.action_Show_Filter_Bar->setChecked(UISettings::values.show_filter_bar);
+ ui.action_Show_Filter_Bar->setChecked(UISettings::values.show_filter_bar.GetValue());
game_list->SetFilterVisible(ui.action_Show_Filter_Bar->isChecked());
- ui.action_Show_Status_Bar->setChecked(UISettings::values.show_status_bar);
+ ui.action_Show_Status_Bar->setChecked(UISettings::values.show_status_bar.GetValue());
statusBar()->setVisible(ui.action_Show_Status_Bar->isChecked());
Debugger::ToggleConsole();
}
@@ -1212,7 +1208,7 @@ void GMainWindow::AllowOSSleep() {
#endif
}
-bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) {
+bool GMainWindow::LoadROM(const QString& filename, u64 program_id, std::size_t program_index) {
// Shutdown previous session if the emu thread is still active...
if (emu_thread != nullptr)
ShutdownGame();
@@ -1235,15 +1231,16 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) {
});
const Core::System::ResultStatus result{
- system.Load(*render_window, filename.toStdString(), program_index)};
+ system.Load(*render_window, filename.toStdString(), program_id, program_index)};
- const auto drd_callout =
- (UISettings::values.callout_flags & static_cast<u32>(CalloutFlag::DRDDeprecation)) == 0;
+ const auto drd_callout = (UISettings::values.callout_flags.GetValue() &
+ static_cast<u32>(CalloutFlag::DRDDeprecation)) == 0;
if (result == Core::System::ResultStatus::Success &&
system.GetAppLoader().GetFileType() == Loader::FileType::DeconstructedRomDirectory &&
drd_callout) {
- UISettings::values.callout_flags |= static_cast<u32>(CalloutFlag::DRDDeprecation);
+ UISettings::values.callout_flags = UISettings::values.callout_flags.GetValue() |
+ static_cast<u32>(CalloutFlag::DRDDeprecation);
QMessageBox::warning(
this, tr("Warning Outdated Game Format"),
tr("You are using the deconstructed ROM directory format for this game, which is an "
@@ -1321,7 +1318,8 @@ void GMainWindow::SelectAndSetCurrentUser() {
Settings::values.current_user = dialog.GetIndex();
}
-void GMainWindow::BootGame(const QString& filename, std::size_t program_index, StartGameType type) {
+void GMainWindow::BootGame(const QString& filename, u64 program_id, std::size_t program_index,
+ StartGameType type) {
LOG_INFO(Frontend, "yuzu starting...");
StoreRecentFile(filename); // Put the filename on top of the list
@@ -1331,7 +1329,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
auto& system = Core::System::GetInstance();
const auto v_file = Core::GetGameFileFromPath(vfs, filename.toUtf8().constData());
- const auto loader = Loader::GetLoader(system, v_file, program_index);
+ const auto loader = Loader::GetLoader(system, v_file, program_id, program_index);
if (loader != nullptr && loader->ReadProgramId(title_id) == Loader::ResultStatus::Success &&
type == StartGameType::Normal) {
@@ -1345,6 +1343,9 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
ConfigureVibration::SetAllVibrationDevices();
+ // Disable fps limit toggle when booting a new title
+ Settings::values.disable_fps_limit.SetValue(false);
+
// Save configurations
UpdateUISettings();
game_list->SaveInterfaceLayout();
@@ -1356,7 +1357,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
SelectAndSetCurrentUser();
}
- if (!LoadROM(filename, program_index))
+ if (!LoadROM(filename, program_id, program_index))
return;
// Create and start the emulation thread
@@ -1387,8 +1388,6 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
game_list_placeholder->hide();
}
status_bar_update_timer.start(500);
- async_status_button->setDisabled(true);
- multicore_status_button->setDisabled(true);
renderer_status_button->setDisabled(true);
if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
@@ -1417,8 +1416,14 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
title_name = Common::FS::PathToUTF8String(
std::filesystem::path{filename.toStdU16String()}.filename());
}
+ const bool is_64bit = system.Kernel().CurrentProcess()->Is64BitProcess();
+ const auto instruction_set_suffix = is_64bit ? tr("(64-bit)") : tr("(32-bit)");
+ title_name = tr("%1 %2", "%1 is the title name. %2 indicates if the title is 64-bit or 32-bit")
+ .arg(QString::fromStdString(title_name), instruction_set_suffix)
+ .toStdString();
LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version);
- UpdateWindowTitle(title_name, title_version);
+ const auto gpu_vendor = system.GPU().Renderer().GetDeviceVendor();
+ UpdateWindowTitle(title_name, title_version, gpu_vendor);
loading_screen->Prepare(system.GetAppLoader());
loading_screen->show();
@@ -1486,8 +1491,6 @@ void GMainWindow::ShutdownGame() {
emu_speed_label->setVisible(false);
game_fps_label->setVisible(false);
emu_frametime_label->setVisible(false);
- async_status_button->setEnabled(true);
- multicore_status_button->setEnabled(true);
renderer_status_button->setEnabled(true);
emulation_running = false;
@@ -1529,8 +1532,8 @@ void GMainWindow::UpdateRecentFiles() {
ui.menu_recent_files->setEnabled(num_recent_files != 0);
}
-void GMainWindow::OnGameListLoadFile(QString game_path) {
- BootGame(game_path);
+void GMainWindow::OnGameListLoadFile(QString game_path, u64 program_id) {
+ BootGame(game_path, program_id);
}
void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target,
@@ -1634,35 +1637,15 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target
void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) {
const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
- const auto transferable_shader_cache_folder_path = shader_cache_dir / "opengl" / "transferable";
- const auto transferable_shader_cache_file_path =
- transferable_shader_cache_folder_path / fmt::format("{:016X}.bin", program_id);
-
- if (!Common::FS::Exists(transferable_shader_cache_file_path)) {
+ const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)};
+ if (!Common::FS::CreateDirs(shader_cache_folder_path)) {
QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"),
- tr("A shader cache for this title does not exist."));
+ tr("Filed to create the shader cache directory for this title."));
return;
}
-
- const auto qt_shader_cache_folder_path =
- QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_folder_path));
- const auto qt_shader_cache_file_path =
- QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_file_path));
-
- // Windows supports opening a folder with selecting a specified file in explorer. On every other
- // OS we just open the transferable shader cache folder without preselecting the transferable
- // shader cache file for the selected game.
-#if defined(Q_OS_WIN)
- const QString explorer = QStringLiteral("explorer");
- QStringList param;
- if (!QFileInfo(qt_shader_cache_file_path).isDir()) {
- param << QStringLiteral("/select,");
- }
- param << QDir::toNativeSeparators(qt_shader_cache_file_path);
- QProcess::startDetached(explorer, param);
-#else
- QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_folder_path));
-#endif
+ const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)};
+ const auto qt_shader_cache_path = QString::fromStdString(shader_path_string);
+ QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_path));
}
static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) {
@@ -1739,8 +1722,8 @@ void GMainWindow::OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryT
RemoveAddOnContent(program_id, entry_type);
break;
}
- void(Common::FS::RemoveDirRecursively(Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
- "game_list"));
+ Common::FS::RemoveDirRecursively(Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
+ "game_list");
game_list->PopulateAsync(UISettings::values.game_dirs);
}
@@ -1805,8 +1788,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
const std::string& game_path) {
const QString question = [this, target] {
switch (target) {
- case GameListRemoveTarget::ShaderCache:
- return tr("Delete Transferable Shader Cache?");
+ case GameListRemoveTarget::GlShaderCache:
+ return tr("Delete OpenGL Transferable Shader Cache?");
+ case GameListRemoveTarget::VkShaderCache:
+ return tr("Delete Vulkan Transferable Shader Cache?");
+ case GameListRemoveTarget::AllShaderCache:
+ return tr("Delete All Transferable Shader Caches?");
case GameListRemoveTarget::CustomConfiguration:
return tr("Remove Custom Game Configuration?");
default:
@@ -1820,8 +1807,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
}
switch (target) {
- case GameListRemoveTarget::ShaderCache:
- RemoveTransferableShaderCache(program_id);
+ case GameListRemoveTarget::GlShaderCache:
+ case GameListRemoveTarget::VkShaderCache:
+ RemoveTransferableShaderCache(program_id, target);
+ break;
+ case GameListRemoveTarget::AllShaderCache:
+ RemoveAllTransferableShaderCaches(program_id);
break;
case GameListRemoveTarget::CustomConfiguration:
RemoveCustomConfiguration(program_id, game_path);
@@ -1829,18 +1820,27 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
}
}
-void GMainWindow::RemoveTransferableShaderCache(u64 program_id) {
+void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) {
+ const auto target_file_name = [target] {
+ switch (target) {
+ case GameListRemoveTarget::GlShaderCache:
+ return "opengl.bin";
+ case GameListRemoveTarget::VkShaderCache:
+ return "vulkan.bin";
+ default:
+ return "";
+ }
+ }();
const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
- const auto transferable_shader_cache_file_path =
- shader_cache_dir / "opengl" / "transferable" / fmt::format("{:016X}.bin", program_id);
+ const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id);
+ const auto target_file = shader_cache_folder_path / target_file_name;
- if (!Common::FS::Exists(transferable_shader_cache_file_path)) {
+ if (!Common::FS::Exists(target_file)) {
QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"),
tr("A shader cache for this title does not exist."));
return;
}
-
- if (Common::FS::RemoveFile(transferable_shader_cache_file_path)) {
+ if (Common::FS::RemoveFile(target_file)) {
QMessageBox::information(this, tr("Successfully Removed"),
tr("Successfully removed the transferable shader cache."));
} else {
@@ -1849,6 +1849,24 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id) {
}
}
+void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) {
+ const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
+ const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id);
+
+ if (!Common::FS::Exists(program_shader_cache_dir)) {
+ QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"),
+ tr("A shader cache for this title does not exist."));
+ return;
+ }
+ if (Common::FS::RemoveDirRecursively(program_shader_cache_dir)) {
+ QMessageBox::information(this, tr("Successfully Removed"),
+ tr("Successfully removed the transferable shader caches."));
+ } else {
+ QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"),
+ tr("Failed to remove the transferable shader cache directory."));
+ }
+}
+
void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) {
const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path));
const auto config_file_name =
@@ -1872,7 +1890,8 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g
}
}
-void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) {
+void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path,
+ DumpRomFSTarget target) {
const auto failed = [this] {
QMessageBox::warning(this, tr("RomFS Extraction Failed!"),
tr("There was an error copying the RomFS files or the user "
@@ -1900,7 +1919,10 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
return;
}
- const auto dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir);
+ const auto dump_dir =
+ target == DumpRomFSTarget::Normal
+ ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)
+ : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents";
const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id);
const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir);
@@ -1910,7 +1932,8 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
if (*romfs_title_id == program_id) {
const u64 ivfc_offset = loader->ReadRomFSIVFCOffset();
const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed};
- romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program);
+ romfs =
+ pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program, nullptr, false);
} else {
romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS();
}
@@ -2209,8 +2232,8 @@ void GMainWindow::OnMenuInstallToNAND() {
: tr("%n file(s) failed to install\n", "", failed_files.size()));
QMessageBox::information(this, tr("Install Results"), install_results);
- void(Common::FS::RemoveDirRecursively(Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
- "game_list"));
+ Common::FS::RemoveDirRecursively(Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir) /
+ "game_list");
game_list->PopulateAsync(UISettings::values.game_dirs);
ui.action_Install_File_NAND->setEnabled(true);
}
@@ -2426,7 +2449,7 @@ void GMainWindow::OnLoadComplete() {
void GMainWindow::OnExecuteProgram(std::size_t program_index) {
ShutdownGame();
- BootGame(last_filename_booted, program_index);
+ BootGame(last_filename_booted, 0, program_index);
}
void GMainWindow::ErrorDisplayDisplayError(QString error_code, QString error_text) {
@@ -2438,7 +2461,8 @@ void GMainWindow::ErrorDisplayDisplayError(QString error_code, QString error_tex
}
void GMainWindow::OnMenuReportCompatibility() {
- if (!Settings::values.yuzu_token.empty() && !Settings::values.yuzu_username.empty()) {
+ if (!Settings::values.yuzu_token.GetValue().empty() &&
+ !Settings::values.yuzu_username.GetValue().empty()) {
CompatDB compatdb{this};
compatdb.exec();
} else {
@@ -2603,7 +2627,7 @@ void GMainWindow::ResetWindowSize1080() {
void GMainWindow::OnConfigure() {
const auto old_theme = UISettings::values.theme;
- const bool old_discord_presence = UISettings::values.enable_discord_presence;
+ const bool old_discord_presence = UISettings::values.enable_discord_presence.GetValue();
ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get());
connect(&configure_dialog, &ConfigureDialog::LanguageChanged, this,
@@ -2660,8 +2684,8 @@ void GMainWindow::OnConfigure() {
if (UISettings::values.theme != old_theme) {
UpdateUITheme();
}
- if (UISettings::values.enable_discord_presence != old_discord_presence) {
- SetDiscordEnabled(UISettings::values.enable_discord_presence);
+ if (UISettings::values.enable_discord_presence.GetValue() != old_discord_presence) {
+ SetDiscordEnabled(UISettings::values.enable_discord_presence.GetValue());
}
emit UpdateThemedIcons();
@@ -2797,7 +2821,7 @@ void GMainWindow::OnCaptureScreenshot() {
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir));
const auto date =
QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd_hh-mm-ss-zzz"));
- QString filename = QStringLiteral("%1%2_%3.png")
+ QString filename = QStringLiteral("%1/%2_%3.png")
.arg(screenshot_path)
.arg(title_id, 16, 16, QLatin1Char{'0'})
.arg(date);
@@ -2817,7 +2841,8 @@ void GMainWindow::OnCaptureScreenshot() {
}
}
#endif
- render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, filename);
+ render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(),
+ filename);
OnStartGame();
}
@@ -2842,25 +2867,27 @@ void GMainWindow::MigrateConfigFiles() {
LOG_INFO(Frontend, "Migrating config file from {} to {}", origin, destination);
if (!Common::FS::RenameFile(origin, destination)) {
// Delete the old config file if one already exists in the new location.
- void(Common::FS::RemoveFile(origin));
+ Common::FS::RemoveFile(origin);
}
}
}
-void GMainWindow::UpdateWindowTitle(const std::string& title_name,
- const std::string& title_version) {
+void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version,
+ std::string_view gpu_vendor) {
const auto branch_name = std::string(Common::g_scm_branch);
const auto description = std::string(Common::g_scm_desc);
const auto build_id = std::string(Common::g_build_id);
const auto yuzu_title = fmt::format("yuzu | {}-{}", branch_name, description);
- const auto override_title = fmt::format(std::string(Common::g_title_bar_format_idle), build_id);
+ const auto override_title =
+ fmt::format(fmt::runtime(std::string(Common::g_title_bar_format_idle)), build_id);
const auto window_title = override_title.empty() ? yuzu_title : override_title;
if (title_name.empty()) {
setWindowTitle(QString::fromStdString(window_title));
} else {
- const auto run_title = fmt::format("{} | {} | {}", window_title, title_name, title_version);
+ const auto run_title =
+ fmt::format("{} | {} | {} | {}", window_title, title_name, title_version, gpu_vendor);
setWindowTitle(QString::fromStdString(run_title));
}
}
@@ -2871,26 +2898,31 @@ void GMainWindow::UpdateStatusBar() {
return;
}
- auto results = Core::System::GetInstance().GetAndResetPerfStats();
- auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify();
- const auto shaders_building = shader_notify.GetShadersBuilding();
+ auto& system = Core::System::GetInstance();
+ auto results = system.GetAndResetPerfStats();
+ auto& shader_notify = system.GPU().ShaderNotify();
+ const int shaders_building = shader_notify.ShadersBuilding();
- if (shaders_building != 0) {
- shader_building_label->setText(
- tr("Building: %n shader(s)", "", static_cast<int>(shaders_building)));
+ if (shaders_building > 0) {
+ shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building));
shader_building_label->setVisible(true);
} else {
shader_building_label->setVisible(false);
}
- if (Settings::values.use_frame_limit.GetValue()) {
+ if (Settings::values.use_speed_limit.GetValue()) {
emu_speed_label->setText(tr("Speed: %1% / %2%")
.arg(results.emulation_speed * 100.0, 0, 'f', 0)
- .arg(Settings::values.frame_limit.GetValue()));
+ .arg(Settings::values.speed_limit.GetValue()));
} else {
emu_speed_label->setText(tr("Speed: %1%").arg(results.emulation_speed * 100.0, 0, 'f', 0));
}
- game_fps_label->setText(tr("Game: %1 FPS").arg(results.average_game_fps, 0, 'f', 0));
+ if (Settings::values.disable_fps_limit) {
+ game_fps_label->setText(
+ tr("Game: %1 FPS (Unlocked)").arg(results.average_game_fps, 0, 'f', 0));
+ } else {
+ game_fps_label->setText(tr("Game: %1 FPS").arg(results.average_game_fps, 0, 'f', 0));
+ }
emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2));
emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue());
@@ -2898,12 +2930,35 @@ void GMainWindow::UpdateStatusBar() {
emu_frametime_label->setVisible(true);
}
+void GMainWindow::UpdateGPUAccuracyButton() {
+ switch (Settings::values.gpu_accuracy.GetValue()) {
+ case Settings::GPUAccuracy::Normal: {
+ gpu_accuracy_button->setText(tr("GPU NORMAL"));
+ gpu_accuracy_button->setChecked(false);
+ break;
+ }
+ case Settings::GPUAccuracy::High: {
+ gpu_accuracy_button->setText(tr("GPU HIGH"));
+ gpu_accuracy_button->setChecked(true);
+ break;
+ }
+ case Settings::GPUAccuracy::Extreme: {
+ gpu_accuracy_button->setText(tr("GPU EXTREME"));
+ gpu_accuracy_button->setChecked(true);
+ break;
+ }
+ default: {
+ gpu_accuracy_button->setText(tr("GPU ERROR"));
+ gpu_accuracy_button->setChecked(true);
+ }
+ }
+}
+
void GMainWindow::UpdateStatusButtons() {
dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
- multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
- async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() ==
Settings::RendererBackend::Vulkan);
+ UpdateGPUAccuracyButton();
}
void GMainWindow::UpdateUISettings() {
@@ -3036,9 +3091,9 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
const auto keys_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::KeysDir);
- void(Common::FS::RemoveFile(keys_dir / "prod.keys_autogenerated"));
- void(Common::FS::RemoveFile(keys_dir / "console.keys_autogenerated"));
- void(Common::FS::RemoveFile(keys_dir / "title.keys_autogenerated"));
+ Common::FS::RemoveFile(keys_dir / "prod.keys_autogenerated");
+ Common::FS::RemoveFile(keys_dir / "console.keys_autogenerated");
+ Common::FS::RemoveFile(keys_dir / "title.keys_autogenerated");
}
Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 11f152cbe..38e66ccd0 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -34,6 +34,7 @@ class QProgressDialog;
class WaitTreeWidget;
enum class GameListOpenTarget;
enum class GameListRemoveTarget;
+enum class DumpRomFSTarget;
enum class InstalledEntryType;
class GameListPlaceholder;
@@ -185,8 +186,8 @@ private:
void PreventOSSleep();
void AllowOSSleep();
- bool LoadROM(const QString& filename, std::size_t program_index);
- void BootGame(const QString& filename, std::size_t program_index = 0,
+ bool LoadROM(const QString& filename, u64 program_id, std::size_t program_index);
+ void BootGame(const QString& filename, u64 program_id = 0, std::size_t program_index = 0,
StartGameType with_config = StartGameType::Normal);
void ShutdownGame();
@@ -237,14 +238,14 @@ private slots:
void OnOpenQuickstartGuide();
void OnOpenFAQ();
/// Called whenever a user selects a game in the game list widget.
- void OnGameListLoadFile(QString game_path);
+ void OnGameListLoadFile(QString game_path, u64 program_id);
void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target,
const std::string& game_path);
void OnTransferableShaderCacheOpenFile(u64 program_id);
void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type);
void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target,
const std::string& game_path);
- void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
+ void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
void OnGameListCopyTID(u64 program_id);
void OnGameListNavigateToGamedbEntry(u64 program_id,
const CompatibilityList& compatibility_list);
@@ -281,15 +282,17 @@ private:
void RemoveBaseContent(u64 program_id, const QString& entry_type);
void RemoveUpdateContent(u64 program_id, const QString& entry_type);
void RemoveAddOnContent(u64 program_id, const QString& entry_type);
- void RemoveTransferableShaderCache(u64 program_id);
+ void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target);
+ void RemoveAllTransferableShaderCaches(u64 program_id);
void RemoveCustomConfiguration(u64 program_id, const std::string& game_path);
std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
InstallResult InstallNSPXCI(const QString& filename);
InstallResult InstallNCA(const QString& filename);
void MigrateConfigFiles();
- void UpdateWindowTitle(const std::string& title_name = {},
- const std::string& title_version = {});
+ void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {},
+ std::string_view gpu_vendor = {});
void UpdateStatusBar();
+ void UpdateGPUAccuracyButton();
void UpdateStatusButtons();
void UpdateUISettings();
void HideMouseCursor();
@@ -315,8 +318,7 @@ private:
QLabel* emu_speed_label = nullptr;
QLabel* game_fps_label = nullptr;
QLabel* emu_frametime_label = nullptr;
- QPushButton* async_status_button = nullptr;
- QPushButton* multicore_status_button = nullptr;
+ QPushButton* gpu_accuracy_button = nullptr;
QPushButton* renderer_status_button = nullptr;
QPushButton* dock_status_button = nullptr;
QTimer status_bar_update_timer;
diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h
index cdcb83f9f..7b9d2dd53 100644
--- a/src/yuzu/uisettings.h
+++ b/src/yuzu/uisettings.h
@@ -13,6 +13,7 @@
#include <QStringList>
#include <QVector>
#include "common/common_types.h"
+#include "common/settings.h"
namespace UISettings {
@@ -48,26 +49,26 @@ struct Values {
QByteArray gamelist_header_state;
QByteArray microprofile_geometry;
- bool microprofile_visible;
+ Settings::BasicSetting<bool> microprofile_visible{false, "microProfileDialogVisible"};
- bool single_window_mode;
- bool fullscreen;
- bool display_titlebar;
- bool show_filter_bar;
- bool show_status_bar;
+ Settings::BasicSetting<bool> single_window_mode{true, "singleWindowMode"};
+ Settings::BasicSetting<bool> fullscreen{false, "fullscreen"};
+ Settings::BasicSetting<bool> display_titlebar{true, "displayTitleBars"};
+ Settings::BasicSetting<bool> show_filter_bar{true, "showFilterBar"};
+ Settings::BasicSetting<bool> show_status_bar{true, "showStatusBar"};
- bool confirm_before_closing;
- bool first_start;
- bool pause_when_in_background;
- bool hide_mouse;
+ Settings::BasicSetting<bool> confirm_before_closing{true, "confirmClose"};
+ Settings::BasicSetting<bool> first_start{true, "firstStart"};
+ Settings::BasicSetting<bool> pause_when_in_background{false, "pauseWhenInBackground"};
+ Settings::BasicSetting<bool> hide_mouse{false, "hideInactiveMouse"};
- bool select_user_on_boot;
+ Settings::BasicSetting<bool> select_user_on_boot{false, "select_user_on_boot"};
// Discord RPC
- bool enable_discord_presence;
+ Settings::BasicSetting<bool> enable_discord_presence{true, "enable_discord_presence"};
- bool enable_screenshot_save_as;
- u16 screenshot_resolution_factor;
+ Settings::BasicSetting<bool> enable_screenshot_save_as{true, "enable_screenshot_save_as"};
+ Settings::BasicSetting<u16> screenshot_resolution_factor{0, "screenshot_resolution_factor"};
QString roms_path;
QString symbols_path;
@@ -83,18 +84,18 @@ struct Values {
// Shortcut name <Shortcut, context>
std::vector<Shortcut> shortcuts;
- uint32_t callout_flags;
+ Settings::BasicSetting<uint32_t> callout_flags{0, "calloutFlags"};
// logging
- bool show_console;
+ Settings::BasicSetting<bool> show_console{false, "showConsole"};
// Game List
- bool show_add_ons;
- uint32_t icon_size;
- uint8_t row_1_text_id;
- uint8_t row_2_text_id;
+ Settings::BasicSetting<bool> show_add_ons{true, "show_add_ons"};
+ Settings::BasicSetting<uint32_t> icon_size{64, "icon_size"};
+ Settings::BasicSetting<uint8_t> row_1_text_id{3, "row_1_text_id"};
+ Settings::BasicSetting<uint8_t> row_2_text_id{2, "row_2_text_id"};
std::atomic_bool is_game_list_reload_pending{false};
- bool cache_game_list;
+ Settings::BasicSetting<bool> cache_game_list{true, "cache_game_list"};
bool configuration_applied;
bool reset_to_defaults;
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 4bf25727b..e55a19649 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -38,6 +38,11 @@ target_include_directories(yuzu-cmd PRIVATE ${RESOURCES_DIR})
target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
+if (YUZU_USE_EXTERNAL_SDL2)
+ target_compile_definitions(yuzu-cmd PRIVATE -DYUZU_USE_EXTERNAL_SDL2)
+ target_include_directories(yuzu-cmd PRIVATE ${PROJECT_BINARY_DIR}/externals/SDL/include)
+endif()
+
if(UNIX AND NOT APPLE)
install(TARGETS yuzu-cmd RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
endif()
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 621b31571..5af1ee6a8 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -241,6 +241,22 @@ static const std::array<int, 8> keyboard_mods{
SDL_SCANCODE_RCTRL, SDL_SCANCODE_RSHIFT, SDL_SCANCODE_RALT, SDL_SCANCODE_RGUI,
};
+template <>
+void Config::ReadSetting(const std::string& group, Settings::BasicSetting<std::string>& setting) {
+ setting = sdl2_config->Get(group, setting.GetLabel(), setting.GetDefault());
+}
+
+template <>
+void Config::ReadSetting(const std::string& group, Settings::BasicSetting<bool>& setting) {
+ setting = sdl2_config->GetBoolean(group, setting.GetLabel(), setting.GetDefault());
+}
+
+template <typename Type>
+void Config::ReadSetting(const std::string& group, Settings::BasicSetting<Type>& setting) {
+ setting = static_cast<Type>(sdl2_config->GetInteger(group, setting.GetLabel(),
+ static_cast<long>(setting.GetDefault())));
+}
+
void Config::ReadValues() {
// Controls
for (std::size_t p = 0; p < Settings::values.players.GetValue().size(); ++p) {
@@ -264,8 +280,7 @@ void Config::ReadValues() {
}
}
- Settings::values.mouse_enabled =
- sdl2_config->GetBoolean("ControlsGeneral", "mouse_enabled", false);
+ ReadSetting("ControlsGeneral", Settings::values.mouse_enabled);
for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
Settings::values.mouse_buttons[i] = sdl2_config->Get(
@@ -275,14 +290,13 @@ void Config::ReadValues() {
Settings::values.mouse_buttons[i] = default_param;
}
- Settings::values.motion_device = sdl2_config->Get(
- "ControlsGeneral", "motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01");
+ ReadSetting("ControlsGeneral", Settings::values.motion_device);
+
+ ReadSetting("ControlsGeneral", Settings::values.touch_device);
- Settings::values.keyboard_enabled =
- sdl2_config->GetBoolean("ControlsGeneral", "keyboard_enabled", false);
+ ReadSetting("ControlsGeneral", Settings::values.keyboard_enabled);
- Settings::values.debug_pad_enabled =
- sdl2_config->GetBoolean("ControlsGeneral", "debug_pad_enabled", false);
+ ReadSetting("ControlsGeneral", Settings::values.debug_pad_enabled);
for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
Settings::values.debug_pad_buttons[i] = sdl2_config->Get(
@@ -303,12 +317,9 @@ void Config::ReadValues() {
Settings::values.debug_pad_analogs[i] = default_param;
}
- Settings::values.vibration_enabled.SetValue(
- sdl2_config->GetBoolean("ControlsGeneral", "vibration_enabled", true));
- Settings::values.enable_accurate_vibrations.SetValue(
- sdl2_config->GetBoolean("ControlsGeneral", "enable_accurate_vibrations", false));
- Settings::values.motion_enabled.SetValue(
- sdl2_config->GetBoolean("ControlsGeneral", "motion_enabled", true));
+ ReadSetting("ControlsGeneral", Settings::values.vibration_enabled);
+ ReadSetting("ControlsGeneral", Settings::values.enable_accurate_vibrations);
+ ReadSetting("ControlsGeneral", Settings::values.motion_enabled);
Settings::values.touchscreen.enabled =
sdl2_config->GetBoolean("ControlsGeneral", "touch_enabled", true);
Settings::values.touchscreen.rotation_angle =
@@ -349,13 +360,11 @@ void Config::ReadValues() {
Settings::TouchFromButtonMap{"default", {}});
num_touch_from_button_maps = 1;
}
- Settings::values.use_touch_from_button =
- sdl2_config->GetBoolean("ControlsGeneral", "use_touch_from_button", false);
- Settings::values.touch_from_button_map_index =
- std::clamp(Settings::values.touch_from_button_map_index, 0, num_touch_from_button_maps - 1);
+ ReadSetting("ControlsGeneral", Settings::values.use_touch_from_button);
+ Settings::values.touch_from_button_map_index = std::clamp(
+ Settings::values.touch_from_button_map_index.GetValue(), 0, num_touch_from_button_maps - 1);
- Settings::values.udp_input_servers =
- sdl2_config->Get("Controls", "udp_input_address", InputCommon::CemuhookUDP::DEFAULT_SRV);
+ ReadSetting("ControlsGeneral", Settings::values.udp_input_servers);
std::transform(keyboard_keys.begin(), keyboard_keys.end(),
Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -367,8 +376,7 @@ void Config::ReadValues() {
Settings::values.keyboard_mods.begin(), InputCommon::GenerateKeyboardParam);
// Data Storage
- Settings::values.use_virtual_sd =
- sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);
+ ReadSetting("Data Storage", Settings::values.use_virtual_sd);
FS::SetYuzuPath(FS::YuzuPath::NANDDir,
sdl2_config->Get("Data Storage", "nand_directory",
FS::GetYuzuPathString(FS::YuzuPath::NANDDir)));
@@ -381,18 +389,16 @@ void Config::ReadValues() {
FS::SetYuzuPath(FS::YuzuPath::DumpDir,
sdl2_config->Get("Data Storage", "dump_directory",
FS::GetYuzuPathString(FS::YuzuPath::DumpDir)));
- Settings::values.gamecard_inserted =
- sdl2_config->GetBoolean("Data Storage", "gamecard_inserted", false);
- Settings::values.gamecard_current_game =
- sdl2_config->GetBoolean("Data Storage", "gamecard_current_game", false);
- Settings::values.gamecard_path = sdl2_config->Get("Data Storage", "gamecard_path", "");
+ ReadSetting("Data Storage", Settings::values.gamecard_inserted);
+ ReadSetting("Data Storage", Settings::values.gamecard_current_game);
+ ReadSetting("Data Storage", Settings::values.gamecard_path);
// System
- Settings::values.use_docked_mode.SetValue(
- sdl2_config->GetBoolean("System", "use_docked_mode", true));
+ ReadSetting("System", Settings::values.use_docked_mode);
- Settings::values.current_user = std::clamp<int>(
- sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1);
+ ReadSetting("System", Settings::values.current_user);
+ Settings::values.current_user = std::clamp<int>(Settings::values.current_user.GetValue(), 0,
+ Service::Account::MAX_USERS - 1);
const auto rng_seed_enabled = sdl2_config->GetBoolean("System", "rng_seed_enabled", false);
if (rng_seed_enabled) {
@@ -409,87 +415,84 @@ void Config::ReadValues() {
Settings::values.custom_rtc = std::nullopt;
}
- Settings::values.language_index.SetValue(
- sdl2_config->GetInteger("System", "language_index", 1));
- Settings::values.time_zone_index.SetValue(
- sdl2_config->GetInteger("System", "time_zone_index", 0));
+ ReadSetting("System", Settings::values.language_index);
+ ReadSetting("System", Settings::values.region_index);
+ ReadSetting("System", Settings::values.time_zone_index);
+ ReadSetting("System", Settings::values.sound_index);
// Core
- Settings::values.use_multi_core.SetValue(
- sdl2_config->GetBoolean("Core", "use_multi_core", true));
+ ReadSetting("Core", Settings::values.use_multi_core);
+
+ // Cpu
+ ReadSetting("Cpu", Settings::values.cpu_accuracy);
+ ReadSetting("Cpu", Settings::values.cpu_debug_mode);
+ ReadSetting("Cpu", Settings::values.cpuopt_page_tables);
+ ReadSetting("Cpu", Settings::values.cpuopt_block_linking);
+ ReadSetting("Cpu", Settings::values.cpuopt_return_stack_buffer);
+ ReadSetting("Cpu", Settings::values.cpuopt_fast_dispatcher);
+ ReadSetting("Cpu", Settings::values.cpuopt_context_elimination);
+ ReadSetting("Cpu", Settings::values.cpuopt_const_prop);
+ ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
+ ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
+ ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
+ ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
+ ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
+ ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
+ ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
+ ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
// Renderer
- const int renderer_backend = sdl2_config->GetInteger(
- "Renderer", "backend", static_cast<int>(Settings::RendererBackend::OpenGL));
- Settings::values.renderer_backend.SetValue(
- static_cast<Settings::RendererBackend>(renderer_backend));
- Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
- Settings::values.vulkan_device.SetValue(
- sdl2_config->GetInteger("Renderer", "vulkan_device", 0));
-
- Settings::values.aspect_ratio.SetValue(
- static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)));
- Settings::values.max_anisotropy.SetValue(
- static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0)));
- Settings::values.use_frame_limit.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_frame_limit", true));
- Settings::values.frame_limit.SetValue(
- static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)));
- Settings::values.use_disk_shader_cache.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
- const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
- Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
- Settings::values.use_asynchronous_gpu_emulation.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
- Settings::values.use_vsync.SetValue(
- static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)));
- Settings::values.use_assembly_shaders.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true));
- Settings::values.use_asynchronous_shaders.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false));
- Settings::values.use_nvdec_emulation.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_nvdec_emulation", true));
- Settings::values.accelerate_astc.SetValue(
- sdl2_config->GetBoolean("Renderer", "accelerate_astc", true));
- Settings::values.use_fast_gpu_time.SetValue(
- sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true));
-
- Settings::values.bg_red.SetValue(
- static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)));
- Settings::values.bg_green.SetValue(
- static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0)));
- Settings::values.bg_blue.SetValue(
- static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0)));
+ ReadSetting("Renderer", Settings::values.renderer_backend);
+ ReadSetting("Renderer", Settings::values.renderer_debug);
+ ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);
+ ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks);
+ ReadSetting("Renderer", Settings::values.vulkan_device);
+
+ ReadSetting("Renderer", Settings::values.fullscreen_mode);
+ ReadSetting("Renderer", Settings::values.aspect_ratio);
+ ReadSetting("Renderer", Settings::values.max_anisotropy);
+ ReadSetting("Renderer", Settings::values.use_speed_limit);
+ ReadSetting("Renderer", Settings::values.speed_limit);
+ ReadSetting("Renderer", Settings::values.use_disk_shader_cache);
+ ReadSetting("Renderer", Settings::values.gpu_accuracy);
+ ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation);
+ ReadSetting("Renderer", Settings::values.use_vsync);
+ ReadSetting("Renderer", Settings::values.fps_cap);
+ ReadSetting("Renderer", Settings::values.disable_fps_limit);
+ ReadSetting("Renderer", Settings::values.shader_backend);
+ ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
+ ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
+ ReadSetting("Renderer", Settings::values.accelerate_astc);
+ ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
+ ReadSetting("Renderer", Settings::values.use_caches_gc);
+
+ ReadSetting("Renderer", Settings::values.bg_red);
+ ReadSetting("Renderer", Settings::values.bg_green);
+ ReadSetting("Renderer", Settings::values.bg_blue);
// Audio
- Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
- Settings::values.enable_audio_stretching.SetValue(
- sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true));
- Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
- Settings::values.volume.SetValue(
- static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1)));
+ ReadSetting("Audio", Settings::values.sink_id);
+ ReadSetting("Audio", Settings::values.enable_audio_stretching);
+ ReadSetting("Audio", Settings::values.audio_device_id);
+ ReadSetting("Audio", Settings::values.volume);
// Miscellaneous
- Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace");
- Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
+ // log_filter has a different default here than from common
+ Settings::values.log_filter =
+ sdl2_config->Get("Miscellaneous", Settings::values.log_filter.GetLabel(), "*:Trace");
+ ReadSetting("Miscellaneous", Settings::values.use_dev_keys);
// Debugging
Settings::values.record_frame_times =
sdl2_config->GetBoolean("Debugging", "record_frame_times", false);
- Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", "");
- Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false);
- Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false);
- Settings::values.enable_fs_access_log =
- sdl2_config->GetBoolean("Debugging", "enable_fs_access_log", false);
- Settings::values.reporting_services =
- sdl2_config->GetBoolean("Debugging", "reporting_services", false);
- Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false);
- Settings::values.use_debug_asserts =
- sdl2_config->GetBoolean("Debugging", "use_debug_asserts", false);
- Settings::values.use_auto_stub = sdl2_config->GetBoolean("Debugging", "use_auto_stub", false);
-
- Settings::values.disable_macro_jit =
- sdl2_config->GetBoolean("Debugging", "disable_macro_jit", false);
+ ReadSetting("Debugging", Settings::values.dump_exefs);
+ ReadSetting("Debugging", Settings::values.dump_nso);
+ ReadSetting("Debugging", Settings::values.enable_fs_access_log);
+ ReadSetting("Debugging", Settings::values.reporting_services);
+ ReadSetting("Debugging", Settings::values.quest_flag);
+ ReadSetting("Debugging", Settings::values.use_debug_asserts);
+ ReadSetting("Debugging", Settings::values.use_auto_stub);
+ ReadSetting("Debugging", Settings::values.disable_macro_jit);
const auto title_list = sdl2_config->Get("AddOns", "title_ids", "");
std::stringstream ss(title_list);
@@ -509,17 +512,14 @@ void Config::ReadValues() {
}
// Web Service
- Settings::values.enable_telemetry =
- sdl2_config->GetBoolean("WebService", "enable_telemetry", true);
- Settings::values.web_api_url =
- sdl2_config->Get("WebService", "web_api_url", "https://api.yuzu-emu.org");
- Settings::values.yuzu_username = sdl2_config->Get("WebService", "yuzu_username", "");
- Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", "");
+ ReadSetting("WebService", Settings::values.enable_telemetry);
+ ReadSetting("WebService", Settings::values.web_api_url);
+ ReadSetting("WebService", Settings::values.yuzu_username);
+ ReadSetting("WebService", Settings::values.yuzu_token);
// Services
- Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "none");
- Settings::values.bcat_boxcat_local =
- sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false);
+ ReadSetting("Services", Settings::values.bcat_backend);
+ ReadSetting("Services", Settings::values.bcat_boxcat_local);
}
void Config::Reload() {
diff --git a/src/yuzu_cmd/config.h b/src/yuzu_cmd/config.h
index 807199278..1ee932be2 100644
--- a/src/yuzu_cmd/config.h
+++ b/src/yuzu_cmd/config.h
@@ -8,6 +8,8 @@
#include <memory>
#include <string>
+#include "common/settings.h"
+
class INIReader;
class Config {
@@ -22,4 +24,14 @@ public:
~Config();
void Reload();
+
+private:
+ /**
+ * Applies a value read from the sdl2_config to a BasicSetting.
+ *
+ * @param group The name of the INI group
+ * @param setting The yuzu setting to modify
+ */
+ template <typename Type>
+ void ReadSetting(const std::string& group, Settings::BasicSetting<Type>& setting);
};
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 37d895ebd..e646e2d2f 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -65,6 +65,13 @@ button_screenshot=
lstick=
rstick=
+# To use the debug_pad, prepend `debug_pad_` before each button setting above.
+# i.e. debug_pad_button_a=
+
+# Enable debug pad inputs to the guest
+# 0 (default): Disabled, 1: Enabled
+debug_pad_enabled =
+
# Whether to enable or disable vibration
# 0: Disabled, 1 (default): Enabled
vibration_enabled=
@@ -73,6 +80,10 @@ vibration_enabled=
# 0 (default): Disabled, 1: Enabled
enable_accurate_vibrations=
+# Enables controller motion inputs
+# 0: Disabled, 1 (default): Enabled
+motion_enabled =
+
# for motion input, the following devices are available:
# - "motion_emu" (default) for emulating motion input from mouse input. Required parameters:
# - "update_period": update period in milliseconds (default to 100)
@@ -98,19 +109,30 @@ use_touch_from_button=
#touch_from_button_maps_0_bind_1=bar
# etc.
-# Most desktop operating systems do not expose a way to poll the motion state of the controllers
-# so as a way around it, cemuhook created a udp client/server protocol to broadcast the data directly
-# from a controller device to the client program. Citra has a client that can connect and read
-# from any cemuhook compatible motion program.
+# List of Cemuhook UDP servers, delimited by ','.
+# Default: 127.0.0.1:26760
+# Example: 127.0.0.1:26760,123.4.5.67:26761
+udp_input_servers =
-# IPv4 address of the udp input server (Default "127.0.0.1")
-udp_input_address=127.0.0.1
+# Enable controlling an axis via a mouse input.
+# 0 (default): Off, 1: On
+mouse_panning =
-# Port of the udp input server. (Default 26760)
-udp_input_port=
+# Set mouse sensitivity.
+# Default: 1.0
+mouse_panning_sensitivity =
-# The pad to request data on. Should be between 0 (Pad 1) and 3 (Pad 4). (Default 0)
-udp_pad_index=
+# Emulate an analog control stick from keyboard inputs.
+# 0 (default): Disabled, 1: Enabled
+emulate_analog_keyboard =
+
+# Enable mouse inputs to the guest
+# 0 (default): Disabled, 1: Enabled
+mouse_enabled =
+
+# Enable keyboard inputs to the guest
+# 0 (default): Disabled, 1: Enabled
+keyboard_enabled =
[Core]
# Whether to use multi-core for CPU emulation
@@ -118,6 +140,17 @@ udp_pad_index=
use_multi_core=
[Cpu]
+# Adjusts various optimizations.
+# Auto-select mode enables choice unsafe optimizations.
+# Accurate enables only safe optimizations.
+# Unsafe allows any unsafe optimizations.
+# 0 (default): Auto-select, 1: Accurate, 2: Enable unsafe optimizations
+cpu_accuracy =
+
+# Allow disabling safe optimizations.
+# 0 (default): Disabled, 1: Enabled
+cpu_debug_mode =
+
# Enable inline page tables optimization (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_page_tables =
@@ -154,6 +187,31 @@ cpuopt_reduce_misalign_checks =
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem =
+# Enable unfuse FMA (improve performance on CPUs without FMA)
+# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_unfuse_fma =
+
+# Enable faster FRSQRTE and FRECPE
+# Only enabled if cpu_accuracy is set to Unsafe.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_reduce_fp_error =
+
+# Enable faster ASIMD instructions (32 bits only)
+# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_ignore_standard_fpcr =
+
+# Enable inaccurate NaN handling
+# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_inaccurate_nan =
+
+# Disable address space checks (64 bits only)
+# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
+# 0: Disabled, 1 (default): Enabled
+cpuopt_unsafe_fastmem_check =
+
[Renderer]
# Which backend API to use.
# 0 (default): OpenGL, 1: Vulkan
@@ -163,16 +221,20 @@ backend =
# 0 (default): Disabled, 1: Enabled
debug =
+# Enable Nsight Aftermath crash dumps
+# 0 (default): Disabled, 1: Enabled
+nsight_aftermath =
+
+# Disable shader loop safety checks, executing the shader without loop logic changes
+# 0 (default): Disabled, 1: Enabled
+disable_shader_loop_safety_checks =
+
# Which Vulkan physical device to use (defaults to 0)
vulkan_device =
-# Whether to use software or hardware rendering.
-# 0: Software, 1 (default): Hardware
-use_hw_renderer =
-
-# Whether to use the Just-In-Time (JIT) compiler for shader emulation
-# 0: Interpreter (slow), 1 (default): JIT (fast)
-use_shader_jit =
+# Whether to use fullscreen or borderless window mode
+# 0 (Windows default): Borderless window, 1 (All other default): Exclusive fullscreen
+fullscreen_mode =
# Aspect ratio
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
@@ -186,9 +248,10 @@ max_anisotropy =
# 0 (default): Off, 1: On
use_vsync =
-# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
-# 0: Off, 1 (default): On
-use_assembly_shaders =
+# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is
+# not available and GLASM is selected, GLSL will be used.
+# 0: GLSL, 1 (default): GLASM, 2: SPIR-V
+shader_backend =
# Whether to allow asynchronous shader building.
# 0 (default): Off, 1: On
@@ -202,61 +265,43 @@ use_nvdec_emulation =
# 0: Off, 1 (default): On
accelerate_astc =
-# Turns on the frame limiter, which will limit frames output to the target game speed
+# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
# 0: Off, 1: On (default)
-use_frame_limit =
+use_speed_limit =
# Limits the speed of the game to run no faster than this value as a percentage of target speed
# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
-frame_limit =
+speed_limit =
# Whether to use disk based shader cache
-# 0 (default): Off, 1 : On
+# 0: Off, 1 (default): On
use_disk_shader_cache =
# Which gpu accuracy level to use
-# 0 (Normal), 1 (High), 2 (Extreme)
+# 0: Normal, 1 (default): High, 2: Extreme (Very slow)
gpu_accuracy =
# Whether to use asynchronous GPU emulation
# 0 : Off (slow), 1 (default): On (fast)
use_asynchronous_gpu_emulation =
-# Forces VSync on the display thread. Usually doesn't impact performance, but on some drivers it can
-# so only turn this off if you notice a speed difference.
+# Inform the guest that GPU operations completed more quickly than they did.
# 0: Off, 1 (default): On
-use_vsync =
+use_fast_gpu_time =
+
+# Whether to use garbage collection or not for GPU caches.
+# 0 (default): Off, 1: On
+use_caches_gc =
# The clear color for the renderer. What shows up on the sides of the bottom screen.
-# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
+# Must be in range of 0-255. Defaults to 0 for all.
bg_red =
bg_blue =
bg_green =
-[Layout]
-# Layout for the screen inside the render window.
-# 0 (default): Default Top Bottom Screen, 1: Single Screen Only, 2: Large Screen Small Screen
-layout_option =
-
-# Toggle custom layout (using the settings below) on or off.
-# 0 (default): Off, 1: On
-custom_layout =
-
-# Screen placement when using Custom layout option
-# 0x, 0y is the top left corner of the render window.
-custom_top_left =
-custom_top_top =
-custom_top_right =
-custom_top_bottom =
-custom_bottom_left =
-custom_bottom_top =
-custom_bottom_right =
-custom_bottom_bottom =
-
-# Swaps the prominent screen with the other screen.
-# For example, if Single Screen is chosen, setting this to 1 will display the bottom screen instead of the top screen.
-# 0 (default): Top Screen is prominent, 1: Bottom Screen is prominent
-swap_screen =
+# Caps the unlocked framerate to a multiple of the title's target FPS.
+# 1 - 1000: Target FPS multiple cap. 1000 (default)
+fps_cap =
[Audio]
# Which audio output engine to use.
@@ -277,7 +322,7 @@ enable_audio_stretching =
output_device =
# Output volume.
-# 1.0 (default): 100%, 0.0; mute
+# 100 (default): 100%, 0; mute
volume =
[Data Storage]
@@ -304,10 +349,6 @@ gamecard_path =
# 1 (default): Yes, 0: No
use_docked_mode =
-# Allow the use of NFC in games
-# 1 (default): Yes, 0 : No
-enable_nfc =
-
# Sets the seed for the RNG generator built into the switch
# rng_seed will be ignored and randomly generated if rng_seed_enabled is false
rng_seed_enabled =
@@ -319,10 +360,6 @@ rng_seed =
custom_rtc_enabled =
custom_rtc =
-# Sets the account username, max length is 32 characters
-# yuzu (default)
-username = yuzu
-
# Sets the systems language index
# 0: Japanese, 1: English (default), 2: French, 3: German, 4: Italian, 5: Spanish, 6: Chinese,
# 7: Korean, 8: Dutch, 9: Portuguese, 10: Russian, 11: Taiwanese, 12: British English, 13: Canadian French,
@@ -331,17 +368,25 @@ language_index =
# The system region that yuzu will use during emulation
# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
-region_value =
+region_index =
# The system time zone that yuzu will use during emulation
# 0: Auto-select (default), 1: Default (system archive value), Others: Index for specified time zone
time_zone_index =
+# Sets the sound output mode.
+# 0: Mono, 1 (default): Stereo, 2: Surround
+sound_index =
+
[Miscellaneous]
# A filter which removes logs below a certain logging level.
# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
log_filter = *:Trace
+# Use developer keys
+# 0 (default): Disabled, 1: Enabled
+use_dev_keys =
+
[Debugging]
# Record frame time data, can be found in the log directory. Boolean value
record_frame_times =
@@ -351,6 +396,8 @@ dump_exefs=false
dump_nso=false
# Determines whether or not yuzu will save the filesystem access log.
enable_fs_access_log=false
+# Enables verbose reporting services
+reporting_services =
# Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode
# false: Retail/Normal Mode (default), true: Kiosk Mode
quest_flag =
@@ -362,6 +409,9 @@ use_debug_asserts =
use_auto_stub =
# Enables/Disables the macro JIT compiler
disable_macro_jit=false
+# Presents guest frames as they become available. Experimental.
+# false: Disabled (default), true: Enabled
+disable_fps_limit=false
[WebService]
# Whether or not to enable telemetry
@@ -386,4 +436,4 @@ title_ids =
# For each title ID, have a key/value pair called `disabled_<title_id>` equal to the names of the add-ons to disable (sep. by '|')
# e.x. disabled_0100000000010000 = Update|DLC <- disables Updates and DLC on Super Mario Odyssey
)";
-}
+} // namespace DefaultINI
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 06b20c975..353e51ea7 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -2,18 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
-#endif
#include <SDL.h>
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
#include "common/logging/log.h"
#include "common/scm_rev.h"
+#include "common/settings.h"
#include "core/core.h"
#include "core/perf_stats.h"
#include "input_common/keyboard.h"
@@ -130,24 +123,37 @@ void EmuWindow_SDL2::OnResize() {
}
void EmuWindow_SDL2::Fullscreen() {
- if (SDL_SetWindowFullscreen(render_window, SDL_WINDOW_FULLSCREEN) == 0) {
- return;
- }
-
- LOG_ERROR(Frontend, "Fullscreening failed: {}", SDL_GetError());
+ switch (Settings::values.fullscreen_mode.GetValue()) {
+ case 1: // Exclusive fullscreen
+ // Set window size to render size before entering fullscreen -- SDL does not resize to
+ // display dimensions in this mode.
+ // TODO: Multiply the window size by resolution_factor (for both docked modes)
+ if (Settings::values.use_docked_mode) {
+ SDL_SetWindowSize(render_window, Layout::ScreenDocked::Width,
+ Layout::ScreenDocked::Height);
+ }
- // Try a different fullscreening method
- LOG_INFO(Frontend, "Attempting to use borderless fullscreen...");
- if (SDL_SetWindowFullscreen(render_window, SDL_WINDOW_FULLSCREEN_DESKTOP) == 0) {
- return;
- }
+ if (SDL_SetWindowFullscreen(render_window, SDL_WINDOW_FULLSCREEN) == 0) {
+ return;
+ }
- LOG_ERROR(Frontend, "Borderless fullscreening failed: {}", SDL_GetError());
+ LOG_ERROR(Frontend, "Fullscreening failed: {}", SDL_GetError());
+ LOG_INFO(Frontend, "Attempting to use borderless fullscreen...");
+ [[fallthrough]];
+ case 0: // Borderless window
+ if (SDL_SetWindowFullscreen(render_window, SDL_WINDOW_FULLSCREEN_DESKTOP) == 0) {
+ return;
+ }
- // Fallback algorithm: Maximise window.
- // Works on all systems (unless something is seriously wrong), so no fallback for this one.
- LOG_INFO(Frontend, "Falling back on a maximised window...");
- SDL_MaximizeWindow(render_window);
+ LOG_ERROR(Frontend, "Borderless fullscreening failed: {}", SDL_GetError());
+ [[fallthrough]];
+ default:
+ // Fallback algorithm: Maximise window.
+ // Works on all systems (unless something is seriously wrong), so no fallback for this one.
+ LOG_INFO(Frontend, "Falling back on a maximised window...");
+ SDL_MaximizeWindow(render_window);
+ break;
+ }
}
void EmuWindow_SDL2::WaitEvent() {
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 837a44be7..eadb41790 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -7,15 +7,7 @@
#include <string>
#define SDL_MAIN_HANDLED
-// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
-#endif
#include <SDL.h>
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
#include <fmt/format.h>
#include <glad/glad.h>
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 3401ad4b4..d1473dbab 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -15,19 +15,16 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
-// Include these late to avoid polluting everything with Xlib macros
-// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
+#ifdef YUZU_USE_EXTERNAL_SDL2
+// Include this before SDL.h to prevent the external from including a dummy
+#define USING_GENERATED_CONFIG_H
+#include <SDL_config.h>
#endif
+
#include <SDL.h>
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
#include <SDL_syswm.h>
-EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsystem)
+EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsystem, bool fullscreen)
: EmuWindow_SDL2{input_subsystem} {
const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name,
Common::g_scm_branch, Common::g_scm_desc);
@@ -45,12 +42,21 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
SetWindowIcon();
+ if (fullscreen) {
+ Fullscreen();
+ }
+
switch (wm.subsystem) {
#ifdef SDL_VIDEO_DRIVER_WINDOWS
case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
window_info.type = Core::Frontend::WindowSystemType::Windows;
window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window);
break;
+#else
+ case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
+ LOG_CRITICAL(Frontend, "Window manager subsystem Windows not compiled");
+ std::exit(EXIT_FAILURE);
+ break;
#endif
#ifdef SDL_VIDEO_DRIVER_X11
case SDL_SYSWM_TYPE::SDL_SYSWM_X11:
@@ -58,6 +64,11 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
window_info.display_connection = wm.info.x11.display;
window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window);
break;
+#else
+ case SDL_SYSWM_TYPE::SDL_SYSWM_X11:
+ LOG_CRITICAL(Frontend, "Window manager subsystem X11 not compiled");
+ std::exit(EXIT_FAILURE);
+ break;
#endif
#ifdef SDL_VIDEO_DRIVER_WAYLAND
case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND:
@@ -65,6 +76,11 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
window_info.display_connection = wm.info.wl.display;
window_info.render_surface = wm.info.wl.surface;
break;
+#else
+ case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND:
+ LOG_CRITICAL(Frontend, "Window manager subsystem Wayland not compiled");
+ std::exit(EXIT_FAILURE);
+ break;
#endif
default:
LOG_CRITICAL(Frontend, "Window manager subsystem not implemented");
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
index bdfdc3c6f..de53844f0 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
@@ -19,7 +19,7 @@ class InputSubsystem;
class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 {
public:
- explicit EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsystem);
+ explicit EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsystem, bool fullscreen);
~EmuWindow_SDL2_VK() override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 584967f5c..35ce23696 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -78,7 +78,7 @@ static void InitializeLogging() {
using namespace Common;
Log::Filter log_filter(Log::Level::Debug);
- log_filter.ParseFilterString(Settings::values.log_filter);
+ log_filter.ParseFilterString(static_cast<std::string>(Settings::values.log_filter));
Log::SetGlobalFilter(log_filter);
Log::AddBackend(std::make_unique<Log::ColorConsoleBackend>());
@@ -175,7 +175,7 @@ int main(int argc, char** argv) {
emu_window = std::make_unique<EmuWindow_SDL2_GL>(&input_subsystem, fullscreen);
break;
case Settings::RendererBackend::Vulkan:
- emu_window = std::make_unique<EmuWindow_SDL2_VK>(&input_subsystem);
+ emu_window = std::make_unique<EmuWindow_SDL2_VK>(&input_subsystem, fullscreen);
break;
}
@@ -218,9 +218,11 @@ int main(int argc, char** argv) {
// Core is loaded, start the GPU (makes the GPU contexts current to this thread)
system.GPU().Start();
- system.Renderer().ReadRasterizer()->LoadDiskResources(
- system.CurrentProcess()->GetTitleID(), false,
- [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
+ if (Settings::values.use_disk_shader_cache.GetValue()) {
+ system.Renderer().ReadRasterizer()->LoadDiskResources(
+ system.CurrentProcess()->GetTitleID(), std::stop_token{},
+ [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
+ }
void(system.Run());
while (emu_window->IsOpen()) {