diff options
Diffstat (limited to 'src')
425 files changed, 49339 insertions, 27155 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f8ec8fea8..6e66dc1df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -142,6 +142,7 @@ add_subdirectory(core) add_subdirectory(audio_core) add_subdirectory(video_core) add_subdirectory(input_common) +add_subdirectory(shader_recompiler) add_subdirectory(tests) if (ENABLE_SDL2) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 7f22ea97a..57922b51c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -1,8 +1,3 @@ -# Add a custom command to generate a new shader_cache_version hash when any of the following files change -# NOTE: This is an approximation of what files affect shader generation, its possible something else -# could affect the result, but much more unlikely than the following files. Keeping a list of files -# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update -set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core") if (DEFINED ENV{AZURECIREPO}) set(BUILD_REPOSITORY $ENV{AZURECIREPO}) endif() @@ -30,64 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp -DGIT_EXECUTABLE=${GIT_EXECUTABLE} -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake DEPENDS - # WARNING! It was too much work to try and make a common location for this list, - # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" - # and also check that the scm_rev files haven't changed + # Check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" # technically we should regenerate if the git version changed, but its not worth the effort imo diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 4f2cc29e1..f055f0e11 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Render, Software) \ SUB(Render, OpenGL) \ SUB(Render, Vulkan) \ + CLS(Shader) \ + SUB(Shader, SPIRV) \ + SUB(Shader, GLASM) \ + SUB(Shader, GLSL) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 88b0e9c01..7ad0334fc 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -114,6 +114,10 @@ enum class Class : u8 { Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend Render_Vulkan, ///< Vulkan backend + Shader, ///< Shader recompiler + Shader_SPIRV, ///< Shader SPIR-V code generation + Shader_GLASM, ///< Shader GLASM code generation + Shader_GLSL, ///< Shader GLSL code generation Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index 5f126f324..cc88994c6 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in @@ -14,7 +14,6 @@ #define BUILD_ID "@BUILD_ID@" #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" -#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@" namespace Common { @@ -28,7 +27,6 @@ const char g_build_version[] = BUILD_VERSION; const char g_build_id[] = BUILD_ID; const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; -const char g_shader_cache_version[] = SHADER_CACHE_VERSION; } // namespace diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf5514386..66268ea0f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -57,7 +57,7 @@ void LogSettings() { log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); - log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); @@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); - values.use_assembly_shaders.SetGlobal(true); + values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_caches_gc.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ce1bc647d..32dfb1d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -24,6 +24,12 @@ enum class RendererBackend : u32 { Vulkan = 1, }; +enum class ShaderBackend : u32 { + GLSL = 0, + GLASM = 1, + SPIRV = 2, +}; + enum class GPUAccuracy : u32 { Normal = 0, High = 1, @@ -308,6 +314,9 @@ struct Values { // Renderer Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"}; BasicSetting<bool> renderer_debug{false, "debug"}; + BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; + BasicSetting<bool> disable_shader_loop_safety_checks{false, + "disable_shader_loop_safety_checks"}; Setting<int> vulkan_device{0, "vulkan_device"}; Setting<u16> resolution_factor{1, "resolution_factor"}; @@ -331,7 +340,7 @@ struct Values { Setting<bool> accelerate_astc{true, "accelerate_astc"}; Setting<bool> use_vsync{true, "use_vsync"}; BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"}; - Setting<bool> use_assembly_shaders{false, "use_assembly_shaders"}; + Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"}; Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; Setting<bool> use_caches_gc{false, "use_caches_gc"}; diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 8272985ff..cd0017726 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -5,6 +5,7 @@ #pragma once #include <atomic> +#include <condition_variable> #include <functional> #include <mutex> #include <stop_token> @@ -39,7 +40,7 @@ public: const auto lambda = [this, func](std::stop_token stop_token) { Common::SetCurrentThreadName(thread_name.c_str()); { - std::conditional_t<with_state, StateType, int> state{func()}; + [[maybe_unused]] std::conditional_t<with_state, StateType, int> state{func()}; while (!stop_token.stop_requested()) { Task task; { diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index cfaf50105..365b8f906 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -62,7 +62,6 @@ json GetYuzuVersionData() { {"build_date", std::string(Common::g_build_date)}, {"build_fullname", std::string(Common::g_build_fullname)}, {"build_version", std::string(Common::g_build_version)}, - {"shader_cache_version", std::string(Common::g_shader_cache_version)}, }; } diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 066cb23e4..422de3a7d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); - AddField(field_type, "Renderer_UseAssemblyShaders", - Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_ShaderBackend", + static_cast<u32>(Settings::values.shader_backend.GetValue())); AddField(field_type, "Renderer_UseAsynchronousShaders", Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt new file mode 100644 index 000000000..b5b7e5e83 --- /dev/null +++ b/src/shader_recompiler/CMakeLists.txt @@ -0,0 +1,268 @@ +add_library(shader_recompiler STATIC + backend/bindings.h + backend/glasm/emit_context.cpp + backend/glasm/emit_context.h + backend/glasm/emit_glasm.cpp + backend/glasm/emit_glasm.h + backend/glasm/emit_glasm_barriers.cpp + backend/glasm/emit_glasm_bitwise_conversion.cpp + backend/glasm/emit_glasm_composite.cpp + backend/glasm/emit_glasm_context_get_set.cpp + backend/glasm/emit_glasm_control_flow.cpp + backend/glasm/emit_glasm_convert.cpp + backend/glasm/emit_glasm_floating_point.cpp + backend/glasm/emit_glasm_image.cpp + backend/glasm/emit_glasm_instructions.h + backend/glasm/emit_glasm_integer.cpp + backend/glasm/emit_glasm_logical.cpp + backend/glasm/emit_glasm_memory.cpp + backend/glasm/emit_glasm_not_implemented.cpp + backend/glasm/emit_glasm_select.cpp + backend/glasm/emit_glasm_shared_memory.cpp + backend/glasm/emit_glasm_special.cpp + backend/glasm/emit_glasm_undefined.cpp + backend/glasm/emit_glasm_warp.cpp + backend/glasm/reg_alloc.cpp + backend/glasm/reg_alloc.h + backend/glsl/emit_context.cpp + backend/glsl/emit_context.h + backend/glsl/emit_glsl.cpp + backend/glsl/emit_glsl.h + backend/glsl/emit_glsl_atomic.cpp + backend/glsl/emit_glsl_barriers.cpp + backend/glsl/emit_glsl_bitwise_conversion.cpp + backend/glsl/emit_glsl_composite.cpp + backend/glsl/emit_glsl_context_get_set.cpp + backend/glsl/emit_glsl_control_flow.cpp + backend/glsl/emit_glsl_convert.cpp + backend/glsl/emit_glsl_floating_point.cpp + backend/glsl/emit_glsl_image.cpp + backend/glsl/emit_glsl_instructions.h + backend/glsl/emit_glsl_integer.cpp + backend/glsl/emit_glsl_logical.cpp + backend/glsl/emit_glsl_memory.cpp + backend/glsl/emit_glsl_not_implemented.cpp + backend/glsl/emit_glsl_select.cpp + backend/glsl/emit_glsl_shared_memory.cpp + backend/glsl/emit_glsl_special.cpp + backend/glsl/emit_glsl_undefined.cpp + backend/glsl/emit_glsl_warp.cpp + backend/glsl/var_alloc.cpp + backend/glsl/var_alloc.h + backend/spirv/emit_context.cpp + backend/spirv/emit_context.h + backend/spirv/emit_spirv.cpp + backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_atomic.cpp + backend/spirv/emit_spirv_barriers.cpp + backend/spirv/emit_spirv_bitwise_conversion.cpp + backend/spirv/emit_spirv_composite.cpp + backend/spirv/emit_spirv_context_get_set.cpp + backend/spirv/emit_spirv_control_flow.cpp + backend/spirv/emit_spirv_convert.cpp + backend/spirv/emit_spirv_floating_point.cpp + backend/spirv/emit_spirv_image.cpp + backend/spirv/emit_spirv_image_atomic.cpp + backend/spirv/emit_spirv_instructions.h + backend/spirv/emit_spirv_integer.cpp + backend/spirv/emit_spirv_logical.cpp + backend/spirv/emit_spirv_memory.cpp + backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_shared_memory.cpp + backend/spirv/emit_spirv_special.cpp + backend/spirv/emit_spirv_undefined.cpp + backend/spirv/emit_spirv_warp.cpp + environment.h + exception.h + frontend/ir/abstract_syntax_list.h + frontend/ir/attribute.cpp + frontend/ir/attribute.h + frontend/ir/basic_block.cpp + frontend/ir/basic_block.h + frontend/ir/breadth_first_search.h + frontend/ir/condition.cpp + frontend/ir/condition.h + frontend/ir/flow_test.cpp + frontend/ir/flow_test.h + frontend/ir/ir_emitter.cpp + frontend/ir/ir_emitter.h + frontend/ir/microinstruction.cpp + frontend/ir/modifiers.h + frontend/ir/opcodes.cpp + frontend/ir/opcodes.h + frontend/ir/opcodes.inc + frontend/ir/patch.cpp + frontend/ir/patch.h + frontend/ir/post_order.cpp + frontend/ir/post_order.h + frontend/ir/pred.h + frontend/ir/program.cpp + frontend/ir/program.h + frontend/ir/reg.h + frontend/ir/type.cpp + frontend/ir/type.h + frontend/ir/value.cpp + frontend/ir/value.h + frontend/maxwell/control_flow.cpp + frontend/maxwell/control_flow.h + frontend/maxwell/decode.cpp + frontend/maxwell/decode.h + frontend/maxwell/indirect_branch_table_track.cpp + frontend/maxwell/indirect_branch_table_track.h + frontend/maxwell/instruction.h + frontend/maxwell/location.h + frontend/maxwell/maxwell.inc + frontend/maxwell/opcodes.cpp + frontend/maxwell/opcodes.h + frontend/maxwell/structured_control_flow.cpp + frontend/maxwell/structured_control_flow.h + frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp + frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp + frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp + frontend/maxwell/translate/impl/barrier_operations.cpp + frontend/maxwell/translate/impl/bitfield_extract.cpp + frontend/maxwell/translate/impl/bitfield_insert.cpp + frontend/maxwell/translate/impl/branch_indirect.cpp + frontend/maxwell/translate/impl/common_encoding.h + frontend/maxwell/translate/impl/common_funcs.cpp + frontend/maxwell/translate/impl/common_funcs.h + frontend/maxwell/translate/impl/condition_code_set.cpp + frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/double_compare_and_set.cpp + frontend/maxwell/translate/impl/double_fused_multiply_add.cpp + frontend/maxwell/translate/impl/double_min_max.cpp + frontend/maxwell/translate/impl/double_multiply.cpp + frontend/maxwell/translate/impl/double_set_predicate.cpp + frontend/maxwell/translate/impl/exit_program.cpp + frontend/maxwell/translate/impl/find_leading_one.cpp + frontend/maxwell/translate/impl/floating_point_add.cpp + frontend/maxwell/translate/impl/floating_point_compare.cpp + frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp + frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp + frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp + frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp + frontend/maxwell/translate/impl/floating_point_min_max.cpp + frontend/maxwell/translate/impl/floating_point_multi_function.cpp + frontend/maxwell/translate/impl/floating_point_multiply.cpp + frontend/maxwell/translate/impl/floating_point_range_reduction.cpp + frontend/maxwell/translate/impl/floating_point_set_predicate.cpp + frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp + frontend/maxwell/translate/impl/half_floating_point_add.cpp + frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp + frontend/maxwell/translate/impl/half_floating_point_helper.cpp + frontend/maxwell/translate/impl/half_floating_point_helper.h + frontend/maxwell/translate/impl/half_floating_point_multiply.cpp + frontend/maxwell/translate/impl/half_floating_point_set.cpp + frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp + frontend/maxwell/translate/impl/impl.cpp + frontend/maxwell/translate/impl/impl.h + frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_add_three_input.cpp + frontend/maxwell/translate/impl/integer_compare.cpp + frontend/maxwell/translate/impl/integer_compare_and_set.cpp + frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp + frontend/maxwell/translate/impl/integer_funnel_shift.cpp + frontend/maxwell/translate/impl/integer_minimum_maximum.cpp + frontend/maxwell/translate/impl/integer_popcount.cpp + frontend/maxwell/translate/impl/integer_scaled_add.cpp + frontend/maxwell/translate/impl/integer_set_predicate.cpp + frontend/maxwell/translate/impl/integer_shift_left.cpp + frontend/maxwell/translate/impl/integer_shift_right.cpp + frontend/maxwell/translate/impl/integer_short_multiply_add.cpp + frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp + frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp + frontend/maxwell/translate/impl/load_constant.cpp + frontend/maxwell/translate/impl/load_constant.h + frontend/maxwell/translate/impl/load_effective_address.cpp + frontend/maxwell/translate/impl/load_store_attribute.cpp + frontend/maxwell/translate/impl/load_store_local_shared.cpp + frontend/maxwell/translate/impl/load_store_memory.cpp + frontend/maxwell/translate/impl/logic_operation.cpp + frontend/maxwell/translate/impl/logic_operation_three_input.cpp + frontend/maxwell/translate/impl/move_predicate_to_register.cpp + frontend/maxwell/translate/impl/move_register.cpp + frontend/maxwell/translate/impl/move_register_to_predicate.cpp + frontend/maxwell/translate/impl/move_special_register.cpp + frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/output_geometry.cpp + frontend/maxwell/translate/impl/pixel_load.cpp + frontend/maxwell/translate/impl/predicate_set_predicate.cpp + frontend/maxwell/translate/impl/predicate_set_register.cpp + frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/surface_atomic_operations.cpp + frontend/maxwell/translate/impl/surface_load_store.cpp + frontend/maxwell/translate/impl/texture_fetch.cpp + frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_gather_swizzled.cpp + frontend/maxwell/translate/impl/texture_gradient.cpp + frontend/maxwell/translate/impl/texture_load.cpp + frontend/maxwell/translate/impl/texture_load_swizzled.cpp + frontend/maxwell/translate/impl/texture_mipmap_level.cpp + frontend/maxwell/translate/impl/texture_query.cpp + frontend/maxwell/translate/impl/video_helper.cpp + frontend/maxwell/translate/impl/video_helper.h + frontend/maxwell/translate/impl/video_minimum_maximum.cpp + frontend/maxwell/translate/impl/video_multiply_add.cpp + frontend/maxwell/translate/impl/video_set_predicate.cpp + frontend/maxwell/translate/impl/vote.cpp + frontend/maxwell/translate/impl/warp_shuffle.cpp + frontend/maxwell/translate/translate.cpp + frontend/maxwell/translate/translate.h + frontend/maxwell/translate_program.cpp + frontend/maxwell/translate_program.h + host_translate_info.h + ir_opt/collect_shader_info_pass.cpp + ir_opt/constant_propagation_pass.cpp + ir_opt/dead_code_elimination_pass.cpp + ir_opt/dual_vertex_pass.cpp + ir_opt/global_memory_to_storage_buffer_pass.cpp + ir_opt/identity_removal_pass.cpp + ir_opt/lower_fp16_to_fp32.cpp + ir_opt/lower_int64_to_int32.cpp + ir_opt/passes.h + ir_opt/ssa_rewrite_pass.cpp + ir_opt/texture_pass.cpp + ir_opt/verification_pass.cpp + object_pool.h + profile.h + program_header.h + runtime_info.h + shader_info.h + varying_state.h +) + +target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) + +if (MSVC) + target_compile_options(shader_recompiler PRIVATE + /W4 + /WX + /we4018 # 'expression' : signed/unsigned mismatch + /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) + /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch + /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data + /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4305 # 'context' : truncation from 'type1' to 'type2' + /we4800 # Implicit conversion from 'type' to bool. Possible information loss + /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior. + ) +else() + target_compile_options(shader_recompiler PRIVATE + -Werror + -Werror=conversion + -Werror=ignored-qualifiers + -Werror=implicit-fallthrough + -Werror=shadow + -Werror=sign-compare + $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> + $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> + -Werror=unused-variable + + # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. + # And this in turns limits the size of a std::array. + $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024> + ) +endif() + +create_target_directory_groups(shader_recompiler) diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp new file mode 100644 index 000000000..069c019ad --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -0,0 +1,154 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { +namespace { +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "FLAT "; + case Interpolation::NoPerspective: + return "NOPERSPECTIVE "; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} +} // Anonymous namespace + +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + // FIXME: Temporary partial implementation + u32 cbuf_index{}; + for (const auto& desc : info.constant_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Constant buffer descriptor array"); + } + Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); + ++cbuf_index; + } + u32 ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Storage buffer descriptor array"); + } + if (runtime_info.glasm_use_storage_buffers) { + Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); + ++bindings.storage_buffer; + ++ssbo_index; + } + } + if (!runtime_info.glasm_use_storage_buffers) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { + Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + } + } + stage = program.stage; + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vertex"; + attrib_name = "vertex"; + break; + case Stage::TessellationControl: + case Stage::TessellationEval: + stage_name = "primitive"; + attrib_name = "primitive"; + break; + case Stage::Geometry: + stage_name = "primitive"; + attrib_name = "vertex"; + break; + case Stage::Fragment: + stage_name = "fragment"; + attrib_name = "fragment"; + break; + case Stage::Compute: + stage_name = "invocation"; + break; + } + const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (loads.Generic(index)) { + Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); + } + } + if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { + Add("ATTRIB vertex_position=vertex.position;"); + } + if (info.uses_invocation_id) { + Add("ATTRIB primitive_invocation=primitive.invocation;"); + } + if (info.stores_tess_level_outer) { + Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); + } + if (info.stores_tess_level_inner) { + Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); + } + if (info.stores.ClipDistances()) { + Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + if (stage == Stage::TessellationControl) { + Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};" + "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};", + index, index, index, index, index, index); + } else { + Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, + index, index); + } + } + if (stage == Stage::Fragment) { + Add("OUTPUT frag_color0=result.color;"); + for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { + Add("OUTPUT frag_color{}=result.color[{}];", index, index); + } + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); + } + } + image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { + image_buffer_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + image_bindings.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { + image_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { + texture_buffer_bindings.push_back(bindings.texture); + bindings.texture += desc.count; + } + texture_bindings.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { + texture_bindings.push_back(bindings.texture); + bindings.texture += desc.count; + } +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h new file mode 100644 index 000000000..8433e5c00 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -0,0 +1,80 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glasm/reg_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +struct RuntimeInfo; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); + + template <typename... Args> + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst), + std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + template <typename... Args> + void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst), + std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + template <typename... Args> + void Add(const char* format_str, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string code; + RegAlloc reg_alloc{}; + const Info& info; + const Profile& profile; + const RuntimeInfo& runtime_info; + + std::vector<u32> texture_buffer_bindings; + std::vector<u32> image_buffer_bindings; + std::vector<u32> texture_bindings; + std::vector<u32> image_bindings; + + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view attrib_name = "invalid"; + + u32 num_safety_loop_vars{}; + bool uses_y_direction{}; +}; + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..a5e8c9b6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -0,0 +1,492 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> +#include <tuple> + +#include "common/div_ceil.h" +#include "common/settings.h" +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { +namespace { +template <class Func> +struct FuncTraits {}; + +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; +}; + +template <typename T> +struct Identity { + Identity(T data_) : data{data_} {} + + T Extract() { + return data; + } + + T data; +}; + +template <bool scalar> +class RegWrapper { +public: + RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { + const Value value{reg_alloc.Peek(ir_value)}; + if (value.type == Type::Register) { + inst = ir_value.InstRecursive(); + reg = Register{value}; + } else { + reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); + } + switch (value.type) { + case Type::Register: + case Type::Void: + break; + case Type::U32: + ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); + break; + case Type::U64: + ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); + break; + } + } + + auto Extract() { + if (inst) { + reg_alloc.Unref(*inst); + } else { + reg_alloc.FreeReg(reg); + } + return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; + } + +private: + RegAlloc& reg_alloc; + IR::Inst* inst{}; + Register reg{}; +}; + +template <typename ArgType> +class ValueWrapper { +public: + ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) + : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} + + ArgType Extract() { + if (!ir_value.IsImmediate()) { + reg_alloc.Unref(*ir_value.InstRecursive()); + } + return value; + } + +private: + RegAlloc& reg_alloc; + const IR::Value& ir_value; + ArgType value; +}; + +template <typename ArgType> +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v<ArgType, Register>) { + return RegWrapper<false>{ctx, arg}; + } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { + return RegWrapper<true>{ctx, arg}; + } else if constexpr (std::is_base_of_v<Value, ArgType>) { + return ValueWrapper<ArgType>{ctx, arg}; + } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { + return Identity<const IR::Value&>{arg}; + } else if constexpr (std::is_same_v<ArgType, u32>) { + return Identity{arg.U32()}; + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return Identity{arg.Attribute()}; + } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { + return Identity{arg.Patch()}; + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return Identity{arg.Reg()}; + } +} + +template <auto func, bool is_first_arg_inst> +struct InvokeCall { + template <typename... Args> + InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, args.Extract()...); + } else { + func(ctx, args.Extract()...); + } + } +}; + +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; + if constexpr (is_first_arg_inst) { + InvokeCall<func, is_first_arg_inst>{ + ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; + } else { + InvokeCall<func, is_first_arg_inst>{ + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; + } +} + +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; + using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwritting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } + PrecolorInst(phi); + } + } +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + const auto eval{ + [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("MOV.S.CC RC,{};" + "IF NE.x;", + eval(node.data.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("ENDIF;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("REP;"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (!Settings::values.disable_shader_loop_safety_checks) { + const u32 loop_index{ctx.num_safety_loop_vars++}; + const u32 vector_index{loop_index / 4}; + const char component{"xyzw"[loop_index % 4]}; + ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" + "BRK(LT.{});", + vector_index, component, vector_index, component, component); + } + if (node.data.repeat.cond.IsImmediate()) { + if (node.data.repeat.cond.U1()) { + ctx.Add("ENDREP;"); + } else { + ctx.Add("BRK;" + "ENDREP;"); + } + } else { + ctx.Add("MOV.S.CC RC,{};" + "BRK(EQ.x);" + "ENDREP;", + eval(node.data.repeat.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("BRK;"); + } + } else { + ctx.Add("MOV.S.CC RC,{};" + "BRK (NE.x);", + eval(node.data.break_node.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("RET;"); + break; + } + } + if (!ctx.reg_alloc.IsEmpty()) { + LOG_WARNING(Shader_GLASM, "Register leak after generating code"); + } +} + +void SetupOptions(const IR::Program& program, const Profile& profile, + const RuntimeInfo& runtime_info, std::string& header) { + const Info& info{program.info}; + const Stage stage{program.stage}; + + // TODO: Track the shared atomic ops + header += "OPTION NV_internal;" + "OPTION NV_shader_storage_buffer;" + "OPTION NV_gpu_program_fp64;"; + if (info.uses_int64_bit_atomics) { + header += "OPTION NV_shader_atomic_int64;"; + } + if (info.uses_atomic_f32_add) { + header += "OPTION NV_shader_atomic_float;"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "OPTION NV_shader_atomic_fp16_vector;"; + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_fswzadd) { + header += "OPTION NV_shader_thread_group;"; + } + if (info.uses_subgroup_shuffles) { + header += "OPTION NV_shader_thread_shuffle;"; + } + if (info.uses_sparse_residency) { + header += "OPTION EXT_sparse_texture2;"; + } + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { + if (profile.support_viewport_index_layer_non_geometry) { + header += "OPTION NV_viewport_array2;"; + } + } + if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { + header += "OPTION NV_geometry_shader_passthrough;"; + } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + header += "OPTION EXT_shader_image_load_formatted;"; + } + if (profile.support_derivative_control) { + header += "OPTION ARB_derivative_control;"; + } + if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { + header += "OPTION NV_early_fragment_tests;"; + } + if (stage == Stage::Fragment) { + header += "OPTION ARB_draw_buffers;"; + } +} + +std::string_view StageHeader(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + return "!!NVvp5.0\n"; + case Stage::TessellationControl: + return "!!NVtcp5.0\n"; + case Stage::TessellationEval: + return "!!NVtep5.0\n"; + case Stage::Geometry: + return "!!NVgp5.0\n"; + case Stage::Fragment: + return "!!NVfp5.0\n"; + case Stage::Compute: + return "!!NVcp5.0\n"; + } + throw InvalidArgument("Invalid stage {}", stage); +} + +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "POINTS"; + case InputTopology::Lines: + return "LINES"; + case InputTopology::LinesAdjacency: + return "LINESS_ADJACENCY"; + case InputTopology::Triangles: + return "TRIANGLES"; + case InputTopology::TrianglesAdjacency: + return "TRIANGLES_ADJACENCY"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "POINTS"; + case OutputTopology::LineStrip: + return "LINE_STRIP"; + case OutputTopology::TriangleStrip: + return "TRIANGLE_STRIP"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} + +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "TRIANGLES"; + case TessPrimitive::Quads: + return "QUADS"; + case TessPrimitive::Isolines: + return "ISOLINES"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "EQUAL"; + case TessSpacing::FractionalOdd: + return "FRACTIONAL_ODD"; + case TessSpacing::FractionalEven: + return "FRACTIONAL_EVEN"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} +} // Anonymous namespace + +std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; + Precolor(program); + EmitCode(ctx, program); + std::string header{StageHeader(program.stage)}; + SetupOptions(program, profile, runtime_info, header); + switch (program.stage) { + case Stage::TessellationControl: + header += fmt::format("VERTICES_OUT {};", program.invocations); + break; + case Stage::TessellationEval: + header += fmt::format("TESS_MODE {};" + "TESS_SPACING {};" + "TESS_VERTEX_ORDER {};", + GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "CW" : "CCW"); + break; + case Stage::Geometry: + header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); + if (program.is_geometry_passthrough) { + if (profile.support_geometry_shader_passthrough) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (program.info.passthrough.Generic(index)) { + header += fmt::format("PASSTHROUGH result.attrib[{}];", index); + } + } + if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + header += "PASSTHROUGH result.position;"; + } + } else { + LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); + } + } else { + header += + fmt::format("VERTICES_OUT {};" + "PRIMITIVE_OUT {};", + program.output_vertices, OutputPrimitive(program.output_topology)); + } + break; + case Stage::Compute: + header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], + program.workgroup_size[1], program.workgroup_size[2]); + break; + default: + break; + } + if (program.shared_memory_size > 0) { + header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); + header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); + } + header += "TEMP "; + for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { + header += fmt::format("R{},", index); + } + if (program.local_memory_size > 0) { + header += fmt::format("lmem[{}],", program.local_memory_size); + } + if (program.info.uses_fswzadd) { + header += "FSWZA[4],FSWZB[4],"; + } + const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; + for (u32 index = 0; index < num_safety_loop_vectors; ++index) { + header += fmt::format("loop{},", index); + } + header += "RC;" + "LONG TEMP "; + for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { + header += fmt::format("D{},", index); + } + header += "DC;"; + if (program.info.uses_fswzadd) { + header += "MOV.F FSWZA[0],-1;" + "MOV.F FSWZA[1],1;" + "MOV.F FSWZA[2],-1;" + "MOV.F FSWZA[3],0;" + "MOV.F FSWZB[0],-1;" + "MOV.F FSWZB[1],-1;" + "MOV.F FSWZB[2],1;" + "MOV.F FSWZB[3],-1;"; + } + for (u32 index = 0; index < num_safety_loop_vectors; ++index) { + header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); + } + if (ctx.uses_y_direction) { + header += "PARAM y_direction[1]={state.material.front.ambient};"; + } + ctx.code.insert(0, header); + ctx.code += "END"; + return ctx.code; +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h new file mode 100644 index 000000000..bcb55f062 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { + +[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program) { + Bindings binding; + return EmitGLASM(profile, runtime_info, program, binding); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp new file mode 100644 index 000000000..9201ccd39 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -0,0 +1,91 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +static void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition<Id>()); +} + +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + // Fake one usage to get a real register out of the condition + inst.DestructiveAddUsage(1); + const Register ret{ctx.reg_alloc.Define(inst)}; + const ScalarS32 input{ctx.reg_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("MOV.S {},{};", ret, input); + } +} + +void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("PK64.U {}.x,{};", inst, value); +} + +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP64.U {}.xy,{}.x;", inst, value); +} + +void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("PK2H {}.x,{};", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP2H {}.xy,{}.x;", inst, value); +} + +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("PK64 {}.x,{};", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP64 {}.xy,{}.x;", inst, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp new file mode 100644 index 000000000..bff0b7c1c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -0,0 +1,244 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +template <auto read_imm, char type, typename... Values> +void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (std::ranges::any_of(std::array{elements...}, + [](const IR::Value& value) { return value.IsImmediate(); })) { + using Type = std::invoke_result_t<decltype(read_imm), IR::Value>; + const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...}; + ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]), + fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); + } + size_t index{}; + for (const IR::Value& element : {elements...}) { + if (!element.IsImmediate()) { + const ScalarU32 value{ctx.reg_alloc.Consume(element)}; + ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value); + } + ++index; + } +} + +void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ret == composite && index == 0) { + // No need to do anything here, the source and destination are the same register + return; + } + ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]); +} + +template <typename ObjectType> +void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, + u32 index, char type) { + const Register ret{ctx.reg_alloc.Define(inst)}; + const char swizzle{"xyzw"[index]}; + if (ret != composite && ret == object) { + // The object is aliased with the return value, so we have to use a temporary to insert + ctx.Add("MOV.{} RC,{};" + "MOV.{} RC.{},{};" + "MOV.{} {},RC;", + type, composite, type, swizzle, object, type, ret); + } else if (ret != composite) { + // The input composite is not aliased with the return value so we have to copy it before + // hand. But the insert object is not aliased with the return value, so we don't have to + // worry about that + ctx.Add("MOV.{} {},{};" + "MOV.{} {}.{},{};", + type, ret, composite, type, ret, swizzle, object); + } else { + // The return value is alised so we can just insert the object, it doesn't matter if it's + // aliased + ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object); + } +} +} // Anonymous namespace + +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2) { + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3) { + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'U'); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'U'); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'U'); +} + +void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2, [[maybe_unused]] Register e3, + [[maybe_unused]] Register e4) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp new file mode 100644 index 000000000..02c9dc6d7 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -0,0 +1,346 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::GLASM { +namespace { +void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + std::string_view size) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Indirect constant buffer loading"); + } + const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset.type == Type::U32) { + // Avoid reading arrays out of bounds, matching hardware's behavior + if (offset.imm_u32 >= 0x10'000) { + ctx.Add("MOV.S {},0;", ret); + return; + } + } + ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + +std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; +} + +u32 TexCoordIndex(IR::Attribute attr) { + return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; +} +} // Anonymous namespace + +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U8"); +} + +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "S8"); +} + +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U16"); +} + +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "S16"); +} + +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U32"); +} + +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "F32"); +} + +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + GetCbuf(ctx, inst, binding, offset, "U32X2"); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.Add("MOV.S {}.x,primitive.id;", inst); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + if (IsInputArray(ctx.stage)) { + ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); + } else { + ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); + } + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle); + break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); + break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); + break; + case IR::Attribute::InstanceId: + ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); + break; + case IR::Attribute::VertexId: + ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); + break; + case IR::Attribute::FrontFace: + ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); + break; + default: + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, + [[maybe_unused]] ScalarU32 vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::Layer: + if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.layer.x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, + "Layer stored outside of geometry shader not supported by device"); + } + break; + case IR::Attribute::ViewportIndex: + if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.viewport.x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, + "Viewport stored outside of geometry shader not supported by device"); + } + break; + case IR::Attribute::ViewportMask: + // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage. + if (ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.viewportmask[0].x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask"); + } + break; + case IR::Attribute::PointSize: + ctx.Add("MOV.F result.pointsize.x,{};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("MOV.F result.position.{},{};", swizzle, value); + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("MOV.F result.color.{},{};", swizzle, value); + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value); + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + ctx.Add("MOV.F result.color.back.{},{};", swizzle, value); + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value); + break; + case IR::Attribute::FogCoordinate: + ctx.Add("MOV.F result.fogcoord.x,{};", value); + break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; + ctx.Add("MOV.F result.clip[{}].x,{};", index, value); + break; + } + default: + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) { + // RC.x = base_index + // RC.y = masked_index + // RC.z = compare_index + ctx.Add("SHR.S RC.x,{},2;" + "AND.S RC.y,RC.x,3;" + "SHR.S RC.z,{},4;", + offset, offset); + + const Register ret{ctx.reg_alloc.Define(inst)}; + u32 num_endifs{}; + const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) { + ++num_endifs; + ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index + "IF NE.w;" + // X + "SEQ.S.CC RC.w,RC.y,0;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // Y + "SEQ.S.CC RC.w,RC.y,1;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // Z + "SEQ.S.CC RC.w,RC.y,2;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // W + "MOV {}.x,{};" + "ENDIF;" + "ENDIF;" + "ENDIF;" + "ELSE;", + compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]); + }}; + const auto read_swizzled{[&](u32 compare_index, std::string_view value) { + const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value), + fmt::format("{}.z", value), fmt::format("{}.w", value)}; + read(compare_index, values); + }}; + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { + const u32 index{static_cast<u32>(IR::Attribute::PositionX)}; + if (IsInputArray(ctx.stage)) { + read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); + } else { + read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); + } + } + for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { + continue; + } + read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); + } + for (u32 i = 0; i < num_endifs; ++i) { + ctx.Add("ENDIF;"); + } +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + const char swizzle{"xyzw"[element]}; + const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""}; + ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { + ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); +} + +void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) { + ctx.Add("MOV.S result.samplemask.x,{};", value); +} + +void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) { + ctx.Add("MOV.F result.depth.z,{};", value); +} + +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { + ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { + ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp new file mode 100644 index 000000000..ccdf1cbc8 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp @@ -0,0 +1,231 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +std::string_view FpRounding(IR::FpRounding fp_rounding) { + switch (fp_rounding) { + case IR::FpRounding::DontCare: + return ""; + case IR::FpRounding::RN: + return ".ROUND"; + case IR::FpRounding::RZ: + return ".TRUNC"; + case IR::FpRounding::RM: + return ".FLR"; + case IR::FpRounding::RP: + return ".CEIL"; + } + throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding); +} + +template <typename InputType> +void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest, + std::string_view src, bool is_long_result) { + const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)}; + const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)}; + ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value); +} +} // Anonymous namespace + +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S16", "F16", false); +} + +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S16", "F32", false); +} + +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S16", "F64", false); +} + +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S32", "F16", false); +} + +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S32", "F32", false); +} + +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S32", "F64", false); +} + +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S64", "F16", true); +} + +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S64", "F32", true); +} + +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S64", "F64", true); +} + +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U16", "F16", false); +} + +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U16", "F32", false); +} + +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U16", "F64", false); +} + +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U32", "F16", false); +} + +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U32", "F32", false); +} + +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U32", "F64", false); +} + +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U64", "F16", true); +} + +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U64", "F32", true); +} + +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U64", "F64", true); +} + +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "U64", "U32", true); +} + +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U32", "U64", false); +} + +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "F16", "F32", false); +} + +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "F16", false); +} + +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "F32", "F64", false); +} + +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "F64", "F32", true); +} + +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S8", false); +} + +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S16", false); +} + +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F16", "S32", false); +} + +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S64", false); +} + +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U8", false); +} + +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U16", false); +} + +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F16", "U32", false); +} + +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U64", false); +} + +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S8", false); +} + +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S16", false); +} + +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F32", "S32", false); +} + +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S64", false); +} + +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U8", false); +} + +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U16", false); +} + +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F32", "U32", false); +} + +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U64", false); +} + +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S8", true); +} + +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S16", true); +} + +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F64", "S32", true); +} + +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S64", true); +} + +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U8", true); +} + +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U16", true); +} + +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F64", "U32", true); +} + +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U64", true); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp new file mode 100644 index 000000000..4ed58619d --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -0,0 +1,414 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +template <typename InputType> +void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op, + std::string_view type, bool ordered, bool inequality = false) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); + if (ordered && inequality) { + ctx.Add("SEQ.{} RC.y,{},{};" + "SEQ.{} RC.z,{},{};" + "AND.U RC.x,RC.x,RC.y;" + "AND.U RC.x,RC.x,RC.z;" + "SNE.S {}.x,RC.x,0;", + type, lhs, lhs, type, rhs, rhs, ret); + } else if (ordered) { + ctx.Add("SNE.S {}.x,RC.x,0;", ret); + } else { + ctx.Add("SNE.{} RC.y,{},{};" + "SNE.{} RC.z,{},{};" + "OR.U RC.x,RC.x,RC.y;" + "OR.U RC.x,RC.x,RC.z;" + "SNE.S {}.x,RC.x,0;", + type, lhs, lhs, type, rhs, rhs, ret); + } +} + +template <typename InputType> +void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, + InputType max_value, std::string_view type) { + // Call MAX first to properly clamp nan to min_value instead + ctx.Add("MAX.{} RC.x,{},{};" + "MIN.{} {}.x,RC.x,{};", + type, min_value, value, type, ret, max_value); +} + +std::string_view Precise(IR::Inst& inst) { + const bool precise{inst.Flags<IR::FpControl>().no_contraction}; + return precise ? ".PREC" : ""; +} +} // Anonymous namespace + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("MOV.F {}.x,|{}|;", inst, value); +} + +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register a, [[maybe_unused]] Register b, + [[maybe_unused]] Register c) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { + ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c); +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { + ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c); +} + +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MAX.F {}.x,{},{};", inst, a, b); +} + +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b); +} + +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MIN.F {}.x,{},{};", inst, a, b); +} + +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { + ctx.Add("MOV.F {}.x,-{};", inst, value); +} + +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); +} + +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("SIN {}.x,{};", inst, value); +} + +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("COS {}.x,{};", inst, value); +} + +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("EX2 {}.x,{};", inst, value); +} + +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("LG2 {}.x,{};", inst, value); +} + +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("RCP {}.x,{};", inst, value); +} + +void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("RSQ {}.x,{};", inst, value); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("MOV.F.SAT {}.x,{};", inst, value); +} + +void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, + [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, + ScalarF32 max_value) { + Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F"); +} + +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, + ScalarF64 max_value) { + Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64"); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("ROUND.F {}.x,{};", inst, value); +} + +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("FLR.F {}.x,{};", inst, value); +} + +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("FLR.F64 {}.x,{};", inst, value); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("CEIL.F {}.x,{};", inst, value); +} + +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("TRUNC.F {}.x,{};", inst, value); +} + +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F", true); +} + +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F", false); +} + +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F", true); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F64", true); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F", false); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F64", false); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F", true); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F64", true); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F", false); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F64", false); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F", true); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F64", true); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F", false); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F64", false); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F", true); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F64", true); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F", false); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F64", false); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Compare(ctx, inst, value, value, "SNE", "F", true, false); +} + +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Compare(ctx, inst, value, value, "SNE", "F64", true, false); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..09e3a9b82 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -0,0 +1,850 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +struct ScopedRegister { + ScopedRegister() = default; + ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} + + ~ScopedRegister() { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + } + + ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + reg_alloc = std::exchange(rhs.reg_alloc, nullptr); + reg = rhs.reg; + return *this; + } + + ScopedRegister(ScopedRegister&& rhs) noexcept + : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} + + ScopedRegister& operator=(const ScopedRegister&) = delete; + ScopedRegister(const ScopedRegister&) = delete; + + RegAlloc* reg_alloc{}; + Register reg; +}; + +std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); + } +} + +std::string Image(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); + } +} + +std::string_view TextureType(IR::TextureInstInfo info) { + if (info.is_depth) { + switch (info.type) { + case TextureType::Color1D: + return "SHADOW1D"; + case TextureType::ColorArray1D: + return "SHADOWARRAY1D"; + case TextureType::Color2D: + return "SHADOW2D"; + case TextureType::ColorArray2D: + return "SHADOWARRAY2D"; + case TextureType::Color3D: + return "SHADOW3D"; + case TextureType::ColorCube: + return "SHADOWCUBE"; + case TextureType::ColorArrayCube: + return "SHADOWARRAYCUBE"; + case TextureType::Buffer: + return "SHADOWBUFFER"; + } + } else { + switch (info.type) { + case TextureType::Color1D: + return "1D"; + case TextureType::ColorArray1D: + return "ARRAY1D"; + case TextureType::Color2D: + return "2D"; + case TextureType::ColorArray2D: + return "ARRAY2D"; + case TextureType::Color3D: + return "3D"; + case TextureType::ColorCube: + return "CUBE"; + case TextureType::ColorArrayCube: + return "ARRAYCUBE"; + case TextureType::Buffer: + return "BUFFER"; + } + } + throw InvalidArgument("Invalid texture type {}", info.type.Value()); +} + +std::string Offset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return ""; + } + return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); +} + +std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx, + const IR::Value& offset2) { + if (offset2.IsEmpty()) { + return {}; + } else { + return {ctx.reg_alloc, ctx.reg_alloc}; + } +} + +void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, + const IR::Value& offset2) { + const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; + const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; + // Input swizzle: [XYXY] [XYXY] + // Output swizzle: [XXXX] [YYYY] + ctx.Add("MOV {}.x,{}.x;" + "MOV {}.y,{}.z;" + "MOV {}.z,{}.x;" + "MOV {}.w,{}.z;" + "MOV {}.x,{}.y;" + "MOV {}.y,{}.w;" + "MOV {}.z,{}.y;" + "MOV {}.w,{}.w;", + off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, + offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); +} + +std::string GradOffset(const IR::Value& offset) { + if (offset.IsImmediate()) { + LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); + return ""; + } + IR::Inst* const vector{offset.InstRecursive()}; + if (!vector->AreAllArgsImmediates()) { + LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); + return ""; + } + switch (vector->NumArgs()) { + case 1: + return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32())); + case 2: + return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()), + static_cast<s32>(vector->Arg(1).U32())); + default: + throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); + } +} + +std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) { + if (coord.IsImmediate()) { + ScopedRegister scoped_reg(ctx.reg_alloc); + ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); + return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; + } + std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; + if (coord.InstRecursive()->HasUses()) { + // Move non-dead coords to a separate register, although this should never happen because + // vectors are only assembled for immediate texture instructions + ctx.Add("MOV.F RC,{};", coord_vec); + coord_vec = "RC"; + } + return {std::move(coord_vec), ScopedRegister{}}; +} + +void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { + if (!sparse_inst) { + return; + } + const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; + ctx.Add("MOV.S {},-1;" + "MOV.S {}(NONRESIDENT),0;", + sparse_ret, sparse_ret); +} + +std::string_view FormatStorage(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return "U"; + case ImageFormat::R8_UINT: + return "U8"; + case ImageFormat::R8_SINT: + return "S8"; + case ImageFormat::R16_UINT: + return "U16"; + case ImageFormat::R16_SINT: + return "S16"; + case ImageFormat::R32_UINT: + return "U32"; + case ImageFormat::R32G32_UINT: + return "U32X2"; + case ImageFormat::R32G32B32A32_UINT: + return "U32X4"; + } + throw InvalidArgument("Invalid image format {}", format); +} + +template <typename T> +void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, + std::string_view op) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} +} // Anonymous namespace + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, Register bias_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); + } else { + if (info.has_lod_clamp) { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + } + } else { + if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { + ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); + } else { + ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + } + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset) { + // Allocate early to avoid aliases + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.has_lod_clamp) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + bias_lc_vec, texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, + offset_vec); + break; + default: + throw NotImplementedException("Invalid type {} with bias and lod clamp", + info.type.Value()); + } + } else { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{}.x;" + "TXB.F{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + } + } else { + if (info.has_lod_clamp) { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, + texture, type, offset_vec); + } else { + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + } + } else { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F{} {},{},{},{}{};", + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + } else { + ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, + type, offset_vec); + } + } + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset) { + // Allocate early to avoid aliases + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TXL.F{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const char comp{"xyzw"[info.gather_component]}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, + offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, + texture, comp, type); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref) { + // FIXME: This instruction is not working as expected + + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + std::string args; + switch (info.type) { + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArrayCube: + args = fmt::format("{},{}", coord_vec, dref_value); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, + type); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::Buffer) { + ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else if (ms.type != Type::Void) { + ctx.Add("MOV.S {}.w,{};" + "TXFMS.F{} {},{},{},{}{};", + coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else { + ctx.Add("MOV.S {}.w,{};" + "TXF.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + ScalarS32 lod) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister dpdx, dpdy; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + if (multi_component) { + // Allocate this early to avoid aliasing other registers + dpdx = ScopedRegister{ctx.reg_alloc}; + dpdy = ScopedRegister{ctx.reg_alloc}; + } + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{GradOffset(offset)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (multi_component) { + ctx.Add("MOV.F {}.x,{}.x;" + "MOV.F {}.y,{}.z;" + "MOV.F {}.x,{}.y;" + "MOV.F {}.y,{}.w;", + dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, + dpdy.reg, derivatives_vec); + if (info.has_lod_clamp) { + const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; + ctx.Add("MOV.F {}.w,{};" + "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", + dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } else { + ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } + } else { + ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, + derivatives_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); + StoreSparse(ctx, sparse_inst); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + Register color) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coord, ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h new file mode 100644 index 000000000..12afda43b --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -0,0 +1,625 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/backend/glasm/reg_alloc.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext; + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst& inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext&, const IR::Value& value); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); +void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); +void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); +void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); +void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value); +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3, + Register e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); +void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, + Register false_value); +void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); +void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitPackFloat2x16(EmitContext& ctx, Register value); +void EmitUnpackFloat2x16(EmitContext& ctx, Register value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); +void EmitFPNeg16(EmitContext& ctx, Register value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecip64(EmitContext& ctx, Register value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecipSqrt64(EmitContext& ctx, Register value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPSaturate16(EmitContext& ctx, Register value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPSaturate64(EmitContext& ctx, Register value); +void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, + ScalarF32 max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, + ScalarF64 max_value); +void EmitFPRoundEven16(EmitContext& ctx, Register value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPFloor16(EmitContext& ctx, Register value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPCeil16(EmitContext& ctx, Register value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPTrunc16(EmitContext& ctx, Register value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); +void EmitFPIsNan16(EmitContext& ctx, Register value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarS32 shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, + ScalarS32 offset, ScalarS32 count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, + ScalarS32 count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, + ScalarU32 count); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value); +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value); +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, Register bias_lc, const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + ScalarS32 lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + Register color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coord, ScalarU32 value); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp new file mode 100644 index 000000000..f55c26b76 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -0,0 +1,294 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b, + std::string_view lop) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b); + if (zero) { + ctx.Add("SEQ.S {},{},0;", *zero, ret); + } + if (sign) { + ctx.Add("SLT.S {},{},0;", *sign, ret); + } +} +} // Anonymous namespace + +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + const std::array flags{ + inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), + }; + for (IR::Inst* const flag_inst : flags) { + if (flag_inst) { + flag_inst->Invalidate(); + } + } + const bool cc{inst.HasAssociatedPseudoOperation()}; + const std::string_view cc_mod{cc ? ".CC" : ""}; + if (cc) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b); + if (!cc) { + return; + } + static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; + for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { + if (!flags[flag_index]) { + continue; + } + const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; + if (flag_index == 0) { + ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); + } else { + // We could use conditional execution here, but it's broken on Nvidia's compiler + ctx.Add("IF {}.x;" + "MOV.S {}.x,-1;" + "ELSE;" + "MOV.S {}.x,0;" + "ENDIF;", + masks[flag_index], flag_ret, flag_ret); + } + } +} + +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { + ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b); +} + +void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("SUB.S {}.x,{},{};", inst, a, b); +} + +void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { + ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b); +} + +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MUL.S {}.x,{},{};", inst, a, b); +} + +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { + ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); + } else { + ctx.Add("MOV.S {},-{};", inst, value); + } +} + +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.S64 {},-{};", inst, value); +} + +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("ABS.S {},{};", inst, value); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { + ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift) { + ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { + ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift) { + ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { + ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarS32 shift) { + ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + BitwiseLogicalOp(ctx, inst, a, b, "AND"); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + BitwiseLogicalOp(ctx, inst, a, b, "OR"); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + BitwiseLogicalOp(ctx, inst, a, b, "XOR"); +} + +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, + ScalarS32 offset, ScalarS32 count) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); + } else { + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" + "BFI.S {},RC,{},{};", + count, offset, ret, insert, base); + } +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, + ScalarS32 count) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); + } else { + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" + "BFE.S {},RC,{};", + count, offset, ret, base); + } +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, + ScalarU32 count) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); + } else { + ctx.Add("MOV.U RC.x,{};" + "MOV.U RC.y,{};" + "BFE.U {},RC,{};", + count, offset, ret, base); + } + if (zero) { + ctx.Add("SEQ.S {},{},0;", *zero, ret); + } + if (sign) { + ctx.Add("SLT.S {},{},0;", *sign, ret); + } +} + +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BFR {},{};", inst, value); +} + +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BTC {},{};", inst, value); +} + +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("NOT.S {},{};", inst, value); +} + +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BTFM.S {},{};", inst, value); +} + +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + ctx.Add("BTFM.U {},{};", inst, value); +} + +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MIN.S {},{},{};", inst, a, b); +} + +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("MIN.U {},{},{};", inst, a, b); +} + +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MAX.S {},{},{};", inst, a, b); +} + +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("MAX.U {},{},{};", inst, a, b); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.S RC.x,{},{};" + "MAX.S {}.x,RC.x,{};", + max, value, ret, min); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.U RC.x,{},{};" + "MAX.U {}.x,RC.x,{};", + max, value, ret, min); +} + +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp new file mode 100644 index 000000000..af9fac7c1 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -0,0 +1,568 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLASM { +namespace { +void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + std::string_view then_expr, std::string_view else_expr = {}) { + // Operate on bindless SSBO, call the expression with bounds checking + // address = c[binding].xy + // length = c[binding].z + const u32 sb_binding{binding.U32()}; + ctx.Add("PK64.U DC,c[{}];" // pointer = address + "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) + "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset + "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length + sb_binding, offset, offset, sb_binding); + if (else_expr.empty()) { + ctx.Add("IF NE.x;{}ENDIF;", then_expr); + } else { + ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); + } +} + +void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, + std::string_view else_expr = {}) { + const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!ctx.info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; + ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr + "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 + "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 + "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size + "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0 + "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0 + "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b + "IF NE.x;" // if cond + "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr + ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, + address, address); + if (pointer_based) { + ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf + "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset + "{}" + "ELSE;", + index, expr); + } else { + ctx.Add("CVT.U32.U64 RC.x,DC.x;" + "{},ssbo{}[RC.x];" + "ELSE;", + expr, index); + } + } + if (!else_expr.empty()) { + ctx.Add("{}", else_expr); + } + const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()}; + for (size_t index = 0; index < num_used_buffers; ++index) { + ctx.Add("ENDIF;"); + } +} + +template <typename ValueType> +void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, + std::string_view size) { + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + } +} + +void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + } +} + +template <typename ValueType> +void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); + } +} + +void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.S {},0;", ret)); + } +} + +template <typename ValueType> +void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + ValueType value, std::string_view operation, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), + offset); + } else { + StorageOp(ctx, binding, offset, + fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); + } +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U8"); +} + +void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "S8"); +} + +void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U16"); +} + +void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "S16"); +} + +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32"); +} + +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32X2"); +} + +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32X4"); +} + +void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U8"); +} + +void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "S8"); +} + +void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U16"); +} + +void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "S16"); +} + +void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) { + GlobalWrite(ctx, address, value, "U32"); +} + +void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U32X2"); +} + +void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U32X4"); +} + +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U8"); +} + +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "S8"); +} + +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U16"); +} + +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "S16"); +} + +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U32"); +} + +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U32X2"); +} + +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset) { + Load(ctx, inst, binding, offset, "U32X4"); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { + Write(ctx, binding, offset, value, "U8"); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value) { + Write(ctx, binding, offset, value, "S8"); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { + Write(ctx, binding, offset, value, "U16"); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value) { + Write(ctx, binding, offset, value, "S16"); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { + Write(ctx, binding, offset, value, "U32"); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value) { + Write(ctx, binding, offset, value, "U32X2"); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value) { + Write(ctx, binding, offset, value, "U32X4"); +} + +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value) { + ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U32"); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U32"); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U32"); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "AND", "U32"); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "OR", "U32"); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U32"); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U64"); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S64"); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U64"); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S64"); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U64"); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "AND", "U64"); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "OR", "U64"); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U64"); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F32"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp new file mode 100644 index 000000000..ff64c6924 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -0,0 +1,273 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLASM { + +#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) + +static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { + switch (phi.Arg(0).Type()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.reg_alloc.Define(phi); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.reg_alloc.LongDefine(phi); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } +} + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.reg_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + DefinePhi(ctx, phi); + } +} + +void EmitVoid(EmitContext&) {} + +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.reg_alloc.Consume(value); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + DefinePhi(ctx, phi); + } + const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; + const Value eval_value{ctx.reg_alloc.Consume(value)}; + + if (phi_reg == eval_value) { + return; + } + switch (phi.Flags<IR::Type>()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value}); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value}); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } +} + +void EmitJoin(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + ctx.Add("KIL TR.x;"); +} + +void EmitBarrier(EmitContext& ctx) { + ctx.Add("BAR;"); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + ctx.Add("MEMBAR.CTA;"); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + ctx.Add("MEMBAR;"); +} + +void EmitPrologue(EmitContext& ctx) { + // TODO +} + +void EmitEpilogue(EmitContext& ctx) { + // TODO +} + +void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { + if (stream.type == Type::U32 && stream.imm_u32 == 0) { + ctx.Add("EMIT;"); + } else { + ctx.Add("EMITS {};", stream); + } +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + if (!stream.IsImmediate()) { + LOG_WARNING(Shader_GLASM, "Stream is not immediate"); + } + ctx.reg_alloc.Consume(stream); + ctx.Add("ENDPRIM;"); +} + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {},invocation.groupid;", inst); +} + +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {},invocation.localid;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); +} + +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst); +} + +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); +} + +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); +} + +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); +} + +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.LongAdd("MOV.S64 {}.x,0;", inst); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("OR.S {},{},{};", inst, a, b); +} + +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("AND.S {},{},{};", inst, a, b); +} + +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("XOR.S {},{},{};", inst, a, b); +} + +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("SEQ.S {},{},0;", inst, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp new file mode 100644 index 000000000..68fff613c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -0,0 +1,67 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, + Register false_value) { + ctx.reg_alloc.InvalidateConditionCodes(); + const Register ret{ctx.reg_alloc.LongDefine(inst)}; + if (ret == true_value) { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(EQ.x),{};", + cond, ret, false_value); + } else if (ret == false_value) { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(NE.x),{};", + cond, ret, true_value); + } else { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x,{};" + "MOV.U64 {}.x(NE.x),{};", + cond, ret, false_value, ret, true_value); + } +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp new file mode 100644 index 000000000..c1498f449 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp @@ -0,0 +1,58 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset); +} + +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U8 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U16 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { + ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { + ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset); +} +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp new file mode 100644 index 000000000..544d475b4 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -0,0 +1,150 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLASM { + +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGALL.S {}.x,{};", inst, pred); +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGANY.S {}.x,{};", inst, pred); +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGEQ.S {}.x,{};", inst, pred); +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGBALLOT {}.x,{};", inst, pred); +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name); +} + +static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask, + std::string_view op) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (in_bounds) { + in_bounds->Invalidate(); + } + std::string mask; + if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { + mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); + } else { + mask = "RC"; + ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};", + ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)}, + ScalarU32{ctx.reg_alloc.Consume(clamp)}); + } + const Register value_ret{ctx.reg_alloc.Define(inst)}; + if (in_bounds) { + const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; + ctx.Add("SHF{}.U {},{},{},{};" + "MOV.U {}.x,{}.y;", + op, bounds_ret, value, index, mask, value_ret, bounds_ret); + } else { + ctx.Add("SHF{}.U {},{},{},{};" + "MOV.U {}.x,{}.y;", + op, value_ret, value, index, mask, value_ret, value_ret); + } +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX"); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP"); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN"); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); +} + +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle) { + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("AND.U RC.z,{}.threadid,3;" + "SHL.U RC.z,RC.z,1;" + "SHR.U RC.z,{},RC.z;" + "AND.U RC.z,RC.z,3;" + "MUL.F RC.x,{},FSWZA[RC.z];" + "MUL.F RC.y,{},FSWZB[RC.z];" + "ADD.F {}.x,RC.x,RC.y;", + ctx.stage_name, swizzle, op_a, op_b, ret); +} + +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.FINE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); + ctx.Add("DDX {}.x,{};", inst, p); + } +} + +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.FINE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); + ctx.Add("DDY {}.x,{};", inst, p); + } +} + +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.COARSE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); + ctx.Add("DDX {}.x,{};", inst, p); + } +} + +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.COARSE {}.x,{};", inst, p); + } else { + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); + ctx.Add("DDY {}.x,{};", inst, p); + } +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp new file mode 100644 index 000000000..4c046db6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -0,0 +1,186 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/reg_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +Register RegAlloc::Define(IR::Inst& inst) { + return Define(inst, false); +} + +Register RegAlloc::LongDefine(IR::Inst& inst) { + return Define(inst, true); +} + +Value RegAlloc::Peek(const IR::Value& value) { + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return PeekInst(*value.Inst()); + } +} + +Value RegAlloc::Consume(const IR::Value& value) { + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return ConsumeInst(*value.Inst()); + } +} + +void RegAlloc::Unref(IR::Inst& inst) { + IR::Inst& value_inst{AliasInst(inst)}; + value_inst.DestructiveRemoveUsage(); + if (!value_inst.HasUses()) { + Free(value_inst.Definition<Id>()); + } +} + +Register RegAlloc::AllocReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(false); + return ret; +} + +Register RegAlloc::AllocLongReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(true); + return ret; +} + +void RegAlloc::FreeReg(Register reg) { + Free(reg.id); +} + +Value RegAlloc::MakeImm(const IR::Value& value) { + Value ret; + switch (value.Type()) { + case IR::Type::Void: + ret.type = Type::Void; + break; + case IR::Type::U1: + ret.type = Type::U32; + ret.imm_u32 = value.U1() ? 0xffffffff : 0; + break; + case IR::Type::U32: + ret.type = Type::U32; + ret.imm_u32 = value.U32(); + break; + case IR::Type::F32: + ret.type = Type::U32; + ret.imm_u32 = Common::BitCast<u32>(value.F32()); + break; + case IR::Type::U64: + ret.type = Type::U64; + ret.imm_u64 = value.U64(); + break; + case IR::Type::F64: + ret.type = Type::U64; + ret.imm_u64 = Common::BitCast<u64>(value.F64()); + break; + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } + return ret; +} + +Register RegAlloc::Define(IR::Inst& inst, bool is_long) { + if (inst.HasUses()) { + inst.SetDefinition<Id>(Alloc(is_long)); + } else { + Id id{}; + id.is_long.Assign(is_long ? 1 : 0); + id.is_null.Assign(1); + inst.SetDefinition<Id>(id); + } + return Register{PeekInst(inst)}; +} + +Value RegAlloc::PeekInst(IR::Inst& inst) { + Value ret; + ret.type = Type::Register; + ret.id = inst.Definition<Id>(); + return ret; +} + +Value RegAlloc::ConsumeInst(IR::Inst& inst) { + Unref(inst); + return PeekInst(inst); +} + +Id RegAlloc::Alloc(bool is_long) { + size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; + std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use}; + if (num_used_registers + num_used_long_registers < NUM_REGS) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (use[reg]) { + continue; + } + num_regs = std::max(num_regs, reg + 1); + use[reg] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.is_long.Assign(is_long ? 1 : 0); + ret.is_spill.Assign(0); + ret.is_condition_code.Assign(0); + ret.is_null.Assign(0); + ret.index.Assign(static_cast<u32>(reg)); + return ret; + } + } + throw NotImplementedException("Register spilling"); +} + +void RegAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid register"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Free spill"); + } + if (id.is_long != 0) { + long_register_use[id.index] = false; + } else { + register_use[id.index] = false; + } +} + +/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::Identity: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastU32F32: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::BitCastF32U32: + case IR::Opcode::BitCastF64U64: + return true; + default: + return false; + } +} + +/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { + IR::Inst* it{&inst}; + while (IsAliased(*it)) { + const IR::Value arg{it->Arg(0)}; + if (arg.IsImmediate()) { + break; + } + it = arg.InstRecursive(); + } + return *it; +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h new file mode 100644 index 000000000..82aec66c6 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -0,0 +1,303 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> + +#include <fmt/format.h> + +#include "common/bit_cast.h" +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext; + +enum class Type : u32 { + Void, + Register, + U32, + U64, +}; + +struct Id { + union { + u32 raw; + BitField<0, 1, u32> is_valid; + BitField<1, 1, u32> is_long; + BitField<2, 1, u32> is_spill; + BitField<3, 1, u32> is_condition_code; + BitField<4, 1, u32> is_null; + BitField<5, 27, u32> index; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(sizeof(Id) == sizeof(u32)); + +struct Value { + Type type; + union { + Id id; + u32 imm_u32; + u64 imm_u64; + }; + + bool operator==(const Value& rhs) const noexcept { + if (type != rhs.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Register: + return id == rhs.id; + case Type::U32: + return imm_u32 == rhs.imm_u32; + case Type::U64: + return imm_u64 == rhs.imm_u64; + } + return false; + } + bool operator!=(const Value& rhs) const noexcept { + return !operator==(rhs); + } +}; +struct Register : Value {}; +struct ScalarRegister : Value {}; +struct ScalarU32 : Value {}; +struct ScalarS32 : Value {}; +struct ScalarF32 : Value {}; +struct ScalarF64 : Value {}; + +class RegAlloc { +public: + RegAlloc() = default; + + Register Define(IR::Inst& inst); + + Register LongDefine(IR::Inst& inst); + + [[nodiscard]] Value Peek(const IR::Value& value); + + Value Consume(const IR::Value& value); + + void Unref(IR::Inst& inst); + + [[nodiscard]] Register AllocReg(); + + [[nodiscard]] Register AllocLongReg(); + + void FreeReg(Register reg); + + void InvalidateConditionCodes() { + // This does nothing for now + } + + [[nodiscard]] size_t NumUsedRegisters() const noexcept { + return num_used_registers; + } + + [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { + return num_used_long_registers; + } + + [[nodiscard]] bool IsEmpty() const noexcept { + return register_use.none() && long_register_use.none(); + } + + /// Returns true if the instruction is expected to be aliased to another + static bool IsAliased(const IR::Inst& inst); + + /// Returns the underlying value out of an alias sequence + static IR::Inst& AliasInst(IR::Inst& inst); + +private: + static constexpr size_t NUM_REGS = 4096; + static constexpr size_t NUM_ELEMENTS = 4; + + Value MakeImm(const IR::Value& value); + + Register Define(IR::Inst& inst, bool is_long); + + Value PeekInst(IR::Inst& inst); + + Value ConsumeInst(IR::Inst& inst); + + Id Alloc(bool is_long); + + void Free(Id id); + + size_t num_used_registers{}; + size_t num_used_long_registers{}; + std::bitset<NUM_REGS> register_use{}; + std::bitset<NUM_REGS> long_register_use{}; +}; + +template <bool scalar, typename FormatContext> +auto FormatTo(FormatContext& ctx, Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code emission"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spill emission"); + } + if constexpr (scalar) { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); + } + if (id.is_long != 0) { + return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); + } + } else { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); + } + if (id.is_long != 0) { + return fmt::format_to(ctx.out(), "D{}", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}", id.index.Value()); + } + } +} + +} // namespace Shader::Backend::GLASM + +template <> +struct fmt::formatter<Shader::Backend::GLASM::Id> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { + return Shader::Backend::GLASM::FormatTo<true>(ctx, id); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::Register> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) { + if (value.type != Shader::Backend::GLASM::Type::Register) { + throw Shader::InvalidArgument("Register value type is not register"); + } + return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) { + if (value.type != Shader::Backend::GLASM::Type::Register) { + throw Shader::InvalidArgument("Register value type is not register"); + } + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", value.imm_u32); + case Shader::Backend::GLASM::Type::U64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32)); + case Shader::Backend::GLASM::Type::U64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32)); + case Shader::Backend::GLASM::Type::U64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + break; + case Shader::Backend::GLASM::Type::U64: + return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64)); + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..4e6f2c0fe --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -0,0 +1,715 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLSL { +namespace { +u32 CbufIndex(size_t offset) { + return (offset / 4) % 4; +} + +char Swizzle(size_t offset) { + return "xyzw"[CbufIndex(offset)]; +} + +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "flat "; + case Interpolation::NoPerspective: + return "noperspective "; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} + +std::string_view InputArrayDecorator(Stage stage) { + switch (stage) { + case Stage::Geometry: + case Stage::TessellationControl: + case Stage::TessellationEval: + return "[]"; + default: + return ""; + } +} + +bool StoresPerVertexAttributes(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + case Stage::TessellationEval: + return true; + default: + return false; + } +} + +std::string OutputDecorator(Stage stage, u32 size) { + switch (stage) { + case Stage::TessellationControl: + return fmt::format("[{}]", size); + default: + return ""; + } +} + +std::string_view SamplerType(TextureType type, bool is_depth) { + if (is_depth) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + throw NotImplementedException("Texture type: {}", type); + } + } + switch (type) { + case TextureType::Color1D: + return "sampler1D"; + case TextureType::ColorArray1D: + return "sampler1DArray"; + case TextureType::Color2D: + return "sampler2D"; + case TextureType::ColorArray2D: + return "sampler2DArray"; + case TextureType::Color3D: + return "sampler3D"; + case TextureType::ColorCube: + return "samplerCube"; + case TextureType::ColorArrayCube: + return "samplerCubeArray"; + case TextureType::Buffer: + return "samplerBuffer"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ImageType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "uimage1D"; + case TextureType::ColorArray1D: + return "uimage1DArray"; + case TextureType::Color2D: + return "uimage2D"; + case TextureType::ColorArray2D: + return "uimage2DArray"; + case TextureType::Color3D: + return "uimage3D"; + case TextureType::ColorCube: + return "uimageCube"; + case TextureType::ColorArrayCube: + return "uimageCubeArray"; + case TextureType::Buffer: + return "uimageBuffer"; + default: + throw NotImplementedException("Image type: {}", type); + } +} + +std::string_view ImageFormatString(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return ""; + case ImageFormat::R8_UINT: + return ",r8ui"; + case ImageFormat::R8_SINT: + return ",r8i"; + case ImageFormat::R16_UINT: + return ",r16ui"; + case ImageFormat::R16_SINT: + return ",r16i"; + case ImageFormat::R32_UINT: + return ",r32ui"; + case ImageFormat::R32G32_UINT: + return ",rg32ui"; + case ImageFormat::R32G32B32A32_UINT: + return ",rgba32ui"; + default: + throw NotImplementedException("Image format: {}", format); + } +} + +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "triangles"; + case TessPrimitive::Quads: + return "quads"; + case TessPrimitive::Isolines: + return "isolines"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "equal_spacing"; + case TessSpacing::FractionalOdd: + return "fractional_odd_spacing"; + case TessSpacing::FractionalEven: + return "fractional_even_spacing"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} + +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "points"; + case InputTopology::Lines: + return "lines"; + case InputTopology::LinesAdjacency: + return "lines_adjacency"; + case InputTopology::Triangles: + return "triangles"; + case InputTopology::TrianglesAdjacency: + return "triangles_adjacency"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "points"; + case OutputTopology::LineStrip: + return "line_strip"; + case OutputTopology::TriangleStrip: + return "triangle_strip"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} + +void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { + if (!ctx.info.stores.Legacy()) { + return; + } + if (ctx.info.stores.FixedFunctionTexture()) { + header += "vec4 gl_TexCoord[8];"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + header += "vec4 gl_FrontColor;"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { + header += "vec4 gl_FrontSecondaryColor;"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { + header += "vec4 gl_BackColor;"; + } + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { + header += "vec4 gl_BackSecondaryColor;"; + } +} + +void SetupOutPerVertex(EmitContext& ctx, std::string& header) { + if (!StoresPerVertexAttributes(ctx.stage)) { + return; + } + if (ctx.uses_geometry_passthrough) { + return; + } + header += "out gl_PerVertex{vec4 gl_Position;"; + if (ctx.info.stores[IR::Attribute::PointSize]) { + header += "float gl_PointSize;"; + } + if (ctx.info.stores.ClipDistances()) { + header += "float gl_ClipDistance[];"; + } + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { + header += "int gl_ViewportIndex;"; + } + SetupLegacyOutPerVertex(ctx, header); + header += "};"; + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { + header += "out int gl_ViewportIndex;"; + } +} + +void SetupInPerVertex(EmitContext& ctx, std::string& header) { + // Currently only required for TessellationControl to adhere to + // ARB_separate_shader_objects requirements + if (ctx.stage != Stage::TessellationControl) { + return; + } + const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; + const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; + const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; + const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; + if (!loads_per_vertex) { + return; + } + header += "in gl_PerVertex{"; + if (loads_position) { + header += "vec4 gl_Position;"; + } + if (loads_point_size) { + header += "float gl_PointSize;"; + } + if (loads_clip_distance) { + header += "float gl_ClipDistance[];"; + } + header += "}gl_in[gl_MaxPatchVertices];"; +} + +void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { + if (!ctx.info.loads.Legacy()) { + return; + } + header += "in gl_PerFragment{"; + if (ctx.info.loads.FixedFunctionTexture()) { + header += "vec4 gl_TexCoord[8];"; + } + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { + header += "vec4 gl_Color;"; + } + header += "};"; +} + +} // Anonymous namespace + +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, + uses_geometry_passthrough{program.is_geometry_passthrough && + profile.support_geometry_shader_passthrough} { + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } + SetupExtensions(); + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vs"; + break; + case Stage::TessellationControl: + stage_name = "tcs"; + header += fmt::format("layout(vertices={})out;", program.invocations); + break; + case Stage::TessellationEval: + stage_name = "tes"; + header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "cw" : "ccw"); + break; + case Stage::Geometry: + stage_name = "gs"; + header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); + if (uses_geometry_passthrough) { + header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; + break; + } else if (program.is_geometry_passthrough && + !profile.support_geometry_shader_passthrough) { + LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported"); + } + header += fmt::format( + "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", + OutputPrimitive(program.output_topology), program.output_vertices); + break; + case Stage::Fragment: + stage_name = "fs"; + position_name = "gl_FragCoord"; + if (runtime_info.force_early_z) { + header += "layout(early_fragment_tests)in;"; + } + if (info.uses_sample_id) { + header += "in int gl_SampleID;"; + } + if (info.stores_sample_mask) { + header += "out int gl_SampleMask[];"; + } + break; + case Stage::Compute: + stage_name = "cs"; + const u32 local_x{std::max(program.workgroup_size[0], 1u)}; + const u32 local_y{std::max(program.workgroup_size[1], 1u)}; + const u32 local_z{std::max(program.workgroup_size[2], 1u)}; + header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", + local_x, local_y, local_z); + break; + } + SetupOutPerVertex(*this, header); + SetupInPerVertex(*this, header); + SetupLegacyInPerFragment(*this, header); + + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { + continue; + } + const auto qualifier{uses_geometry_passthrough ? "passthrough" + : fmt::format("location={}", index)}; + header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; + header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); + } + if (stage == Stage::Fragment) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { + continue; + } + header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); + } + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); + } + } + DefineConstantBuffers(bindings); + DefineStorageBuffers(bindings); + SetupImages(bindings); + SetupTextures(bindings); + DefineHelperFunctions(); + DefineConstants(); +} + +void EmitContext::SetupExtensions() { + header += "#extension GL_ARB_separate_shader_objects : enable\n"; + if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } + if (info.uses_int64 && profile.support_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } + if (info.uses_int64_bit_atomics) { + header += "#extension GL_NV_shader_atomic_int64 : enable\n"; + } + if (info.uses_atomic_f32_add) { + header += "#extension GL_NV_shader_atomic_float : enable\n"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; + } + if (info.uses_fp16) { + if (profile.support_gl_nv_gpu_shader_5) { + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } + if (profile.support_gl_amd_gpu_shader_half_float) { + header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; + } + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_subgroup_shuffles || info.uses_fswzadd) { + header += "#extension GL_ARB_shader_ballot : enable\n" + "#extension GL_ARB_shader_group_vote : enable\n"; + if (!info.uses_int64 && profile.support_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } + if (profile.support_gl_warp_intrinsics) { + header += "#extension GL_NV_shader_thread_shuffle : enable\n"; + } + } + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && + profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { + header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; + } + if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + header += "#extension GL_NV_viewport_array2 : enable\n"; + } + if (info.uses_typeless_image_reads) { + header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; + } + if (info.uses_derivatives && profile.support_gl_derivative_control) { + header += "#extension GL_ARB_derivative_control : enable\n"; + } + if (uses_geometry_passthrough) { + header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; + } +} + +void EmitContext::DefineConstantBuffers(Bindings& bindings) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + for (const auto& desc : info.constant_buffer_descriptors) { + header += fmt::format( + "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", + bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); + bindings.uniform_buffer += desc.count; + } +} + +void EmitContext::DefineStorageBuffers(Bindings& bindings) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + u32 index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", + bindings.storage_buffer, stage_name, bindings.storage_buffer, + stage_name, index); + bindings.storage_buffer += desc.count; + index += desc.count; + } +} + +void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + std::string definition{fmt::format("layout(location={}", index)}; + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!runtime_info.xfb_varyings.empty()) { + xfb_varying = &runtime_info.xfb_varyings[base_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + if (element > 0) { + definition += fmt::format(",component={}", element); + } + if (xfb_varying) { + definition += + fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, + xfb_varying->stride, xfb_varying->offset); + } + std::string name{fmt::format("out_attr{}", index)}; + if (num_components < 4 || element > 0) { + name += fmt::format("_{}", swizzle.substr(element, num_components)); + } + const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; + definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); + header += definition; + + const GenericElementInfo element_info{ + .name = name, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(output_generics[index].begin() + element, num_components, element_info); + element += num_components; + } +} + +void EmitContext::DefineHelperFunctions() { + header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" + "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; + if (info.uses_global_increment || info.uses_shared_increment) { + header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; + } + if (info.uses_global_decrement || info.uses_shared_decrement) { + header += "uint CasDecrement(uint op_a,uint op_b){" + "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; + } + if (info.uses_atomic_f32_add) { + header += "uint CasFloatAdd(uint op_a,float op_b){" + "return ftou(utof(op_a)+op_b);}"; + } + if (info.uses_atomic_f32x2_add) { + header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" + "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; + } + if (info.uses_atomic_f32x2_min) { + header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f32x2_max) { + header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f16x2_add) { + header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; + } + if (info.uses_atomic_f16x2_min) { + header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; + } + if (info.uses_atomic_f16x2_max) { + header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; + } + if (info.uses_atomic_s32_min) { + header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; + } + if (info.uses_atomic_s32_max) { + header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; + } + if (info.uses_global_memory && profile.support_int64) { + header += DefineGlobalMemoryFunctions(); + } + if (info.loads_indexed_attributes) { + const bool is_array{stage == Stage::Geometry}; + const auto vertex_arg{is_array ? ",uint vertex" : ""}; + std::string func{ + fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " + "masked_index=uint(base_index)&3u;switch(base_index>>2){{", + vertex_arg)}; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + const auto position_idx{is_array ? "gl_in[vertex]." : ""}; + func += fmt::format("case {}:return {}{}[masked_index];", + static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx, + position_name); + } + const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { + continue; + } + const auto vertex_idx{is_array ? "[vertex]" : ""}; + func += fmt::format("case {}:return in_attr{}{}[masked_index];", + base_attribute_value + index, index, vertex_idx); + } + func += "default: return 0.0;}}"; + header += func; + } + if (info.stores_indexed_attributes) { + // TODO + } +} + +std::string EmitContext::DefineGlobalMemoryFunctions() { + const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; + const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; + const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; + std::array<std::string, 2> addr_xy; + std::array<std::string, 2> size_xy; + for (size_t i = 0; i < addr_xy.size(); ++i) { + const auto addr_loc{ssbo.cbuf_offset + 4 * i}; + const auto size_loc{size_cbuf_offset + 4 * i}; + addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); + size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); + } + const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; + const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; + func += addr_statment; + + const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; + const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; + const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; + const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; + func += comparison; + + const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; + func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); + }}; + std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; + std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; + std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; + std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; + std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; + std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); + define_body(write_func_64, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); + define_body(write_func_128, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" + "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); + define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); + define_body(load_func_64, index, + "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); + define_body(load_func_128, index, + "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" + "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); + } + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; + load_func += "return 0u;}"; + load_func_64 += "return uvec2(0);}"; + load_func_128 += "return uvec4(0);}"; + return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; +} + +void EmitContext::SetupImages(Bindings& bindings) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { + image_buffers.push_back({bindings.image, desc.count}); + const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); + bindings.image += desc.count; + } + images.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { + images.push_back({bindings.image, desc.count}); + const auto format{ImageFormatString(desc.format)}; + const auto image_type{ImageType(desc.type)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, + qualifier, image_type, bindings.image, array_decorator); + bindings.image += desc.count; + } +} + +void EmitContext::SetupTextures(Bindings& bindings) { + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { + texture_buffers.push_back({bindings.texture, desc.count}); + const auto sampler_type{SamplerType(TextureType::Buffer, false)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); + bindings.texture += desc.count; + } + textures.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { + textures.push_back({bindings.texture, desc.count}); + const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); + bindings.texture += desc.count; + } +} + +void EmitContext::DefineConstants() { + if (info.uses_fswzadd) { + header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" + "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..d9b639d29 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -0,0 +1,174 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glsl/var_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +struct RuntimeInfo; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +struct GenericElementInfo { + std::string name; + u32 first_element{}; + u32 num_components{}; +}; + +struct TextureImageDefinition { + u32 binding; + u32 count; +}; + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); + + template <GlslVarType type, typename... Args> + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + const auto var_def{var_alloc.AddDefine(inst, type)}; + if (var_def.empty()) { + // skip assigment. + code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...); + } else { + code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...); + } + // TODO: Remove this + code += '\n'; + } + + template <typename... Args> + void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U1>(format_str, inst, args...); + } + + template <typename... Args> + void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F16x2>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32>(format_str, inst, args...); + } + + template <typename... Args> + void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U64>(format_str, inst, args...); + } + + template <typename... Args> + void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F64>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32x2>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32x2>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32x3>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32x3>(format_str, inst, args...); + } + + template <typename... Args> + void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::U32x4>(format_str, inst, args...); + } + + template <typename... Args> + void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::F32x4>(format_str, inst, args...); + } + + template <typename... Args> + void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::PrecF32>(format_str, inst, args...); + } + + template <typename... Args> + void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add<GlslVarType::PrecF64>(format_str, inst, args...); + } + + template <typename... Args> + void Add(const char* format_str, Args&&... args) { + code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string header; + std::string code; + VarAlloc var_alloc; + const Info& info; + const Profile& profile; + const RuntimeInfo& runtime_info; + + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view position_name = "gl_Position"; + + std::vector<TextureImageDefinition> texture_buffers; + std::vector<TextureImageDefinition> image_buffers; + std::vector<TextureImageDefinition> textures; + std::vector<TextureImageDefinition> images; + std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; + + u32 num_safety_loop_vars{}; + + bool uses_y_direction{}; + bool uses_cc_carry{}; + bool uses_geometry_passthrough{}; + +private: + void SetupExtensions(); + void DefineConstantBuffers(Bindings& bindings); + void DefineStorageBuffers(Bindings& bindings); + void DefineGenericOutput(size_t index, u32 invocations); + void DefineHelperFunctions(); + void DefineConstants(); + std::string DefineGlobalMemoryFunctions(); + void SetupImages(Bindings& bindings); + void SetupTextures(Bindings& bindings); +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..8a430d573 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -0,0 +1,252 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> +#include <tuple> +#include <type_traits> + +#include "common/div_ceil.h" +#include "common/settings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" + +namespace Shader::Backend::GLSL { +namespace { +template <class Func> +struct FuncTraits {}; + +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; +}; + +template <auto func, typename... Args> +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); +} + +template <typename ArgType> +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v<ArgType, std::string_view>) { + return ctx.var_alloc.Consume(arg); + } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { + return arg; + } else if constexpr (std::is_same_v<ArgType, u32>) { + return arg.U32(); + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return arg.Attribute(); + } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { + return arg.Patch(); + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return arg.Reg(); + } +} + +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; + if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { + if constexpr (is_first_arg_inst) { + SetDefinition<func>( + ctx, inst, *inst, + Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + SetDefinition<func>( + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } +} + +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; + using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwritting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } + PrecolorInst(phi); + } + } +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("}}"); + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("break;"); + } + } else { + ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond)); + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("return;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("for(;;){{"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (Settings::values.disable_shader_loop_safety_checks) { + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); + } else { + ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++, + ctx.var_alloc.Consume(node.data.repeat.cond)); + } + break; + default: + throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); + } + } +} + +std::string GlslVersionSpecifier(const EmitContext& ctx) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { + return " compatibility"; + } + return ""; +} + +bool IsPreciseType(GlslVarType type) { + switch (type) { + case GlslVarType::PrecF32: + case GlslVarType::PrecF64: + return true; + default: + return false; + } +} + +void DefineVariables(const EmitContext& ctx, std::string& header) { + for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) { + const auto type{static_cast<GlslVarType>(i)}; + const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; + const auto type_name{ctx.var_alloc.GetGlslType(type)}; + const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; + const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; + // Temps/return types that are never used are stored at index 0 + if (tracker.uses_temp) { + header += fmt::format("{}{} t{}={}(0);", precise, type_name, + ctx.var_alloc.Representation(0, type), type_name); + } + for (u32 index = 0; index < tracker.num_used; ++index) { + header += fmt::format("{}{} {}={}(0);", precise, type_name, + ctx.var_alloc.Representation(index, type), type_name); + } + } + for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) { + header += fmt::format("int loop{}=0x2000;", i); + } +} +} // Anonymous namespace + +std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; + Precolor(program); + EmitCode(ctx, program); + const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; + ctx.header.insert(0, version); + if (program.shared_memory_size > 0) { + const auto requested_size{program.shared_memory_size}; + const auto max_size{profile.gl_max_compute_smem_size}; + const bool needs_clamp{requested_size > max_size}; + if (needs_clamp) { + LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", + requested_size, max_size); + } + const auto smem_size{needs_clamp ? max_size : requested_size}; + ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); + } + ctx.header += "void main(){\n"; + if (program.local_memory_size > 0) { + ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U)); + } + DefineVariables(ctx, ctx.header); + if (ctx.uses_cc_carry) { + ctx.header += "uint carry;"; + } + if (program.info.uses_subgroup_shuffles) { + ctx.header += "bool shfl_in_bounds;"; + } + ctx.code.insert(0, ctx.header); + ctx.code += '}'; + return ctx.code; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..20e5719e6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -0,0 +1,24 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLSL { + +[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitGLSL(profile, {}, program, binding); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..772acc5a4 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -0,0 +1,418 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{ + "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"}; + +void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view value, std::string_view function) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const std::string smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret); +} + +void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); +} + +void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, + std::string_view function) { + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.AddF32("{}=utof({});", inst, ret); +} +} // Anonymous namespace + +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32"); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32"); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement"); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement"); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, + pointer_offset); + ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", + pointer_offset, value, pointer_offset, value); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}" + "))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," + "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e1d1b558e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitBarrier(EmitContext& ctx) { + ctx.Add("barrier();"); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + ctx.Add("groupMemoryBarrier();"); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + ctx.Add("memoryBarrier();"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..3c1714e89 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -0,0 +1,94 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst& value_inst{*value.InstRecursive()}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition<Id>()); +} +} // Anonymous namespace + +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + // Fake one usage to get a real variable out of the condition + inst.DestructiveAddUsage(1); + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; + const auto input{ctx.var_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("{}={};", ret, input); + } +} + +void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=ftou({});", inst, value); +} + +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); +} + +void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=utof({});", inst, value); +} + +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=uint64BitsToDouble({});", inst, value); +} + +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=packUint2x32({});", inst, value); +} + +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); +} + +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packFloat2x16({});", inst, value); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packHalf2x16({});", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value); +} + +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=packDouble2x32({});", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..49a66e3ec --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -0,0 +1,219 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr std::string_view SWIZZLE{"xyzw"}; +void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, + std::string_view object, u32 index) { + if (result == composite) { + // The result is aliased with the composite + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); + } else { + ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); + } +} +} // Anonymous namespace + +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3, + [[maybe_unused]] std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + CompositeInsert(ctx, ret, composite, object, index); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..580063fa9 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -0,0 +1,456 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char SWIZZLE[]{"xyzw"}; + +u32 CbufIndex(u32 offset) { + return (offset / 4) % 4; +} + +char OffsetSwizzle(u32 offset) { + return SWIZZLE[CbufIndex(offset)]; +} + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + +std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; +} + +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + +void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, + const IR::Value& offset, u32 num_bits, std::string_view cast = {}, + std::string_view bit_offset = {}) { + const bool is_immediate{offset.IsImmediate()}; + const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; + if (is_immediate) { + const s32 signed_offset{static_cast<s32>(offset.U32())}; + static constexpr u32 cbuf_size{0x10000}; + if (signed_offset < 0 || offset.U32() > cbuf_size) { + LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); + ctx.Add("{}=0u;", ret); + return; + } + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) + : fmt::format("{}>>4", offset_var)}; + const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) + : fmt::format("[({}>>2)%4]", offset_var)}; + + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; + const auto extraction{num_bits == 32 ? cbuf_cast + : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, + bit_offset, num_bits)}; + if (!component_indexing_bug) { + const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; + ctx.Add("{}={};", ret, result); + return; + } + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 i = 0; i < 4; ++i) { + const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; + const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)}; + ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); + } +} + +void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); + } +} + +void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); + } +} + +u32 TexCoordIndex(IR::Attribute attr) { + return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; +} +} // Anonymous namespace + +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf8(ctx, inst, binding, offset, "ftou"); +} + +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf8(ctx, inst, binding, offset, "ftoi"); +} + +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf16(ctx, inst, binding, offset, "ftou"); +} + +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf16(ctx, inst, binding, offset, "ftoi"); +} + +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + GetCbuf(ctx, ret, binding, offset, 32, "ftou"); +} + +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; + GetCbuf(ctx, ret, binding, offset, 32); +} + +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + if (offset.IsImmediate()) { + static constexpr u32 cbuf_size{0x10000}; + const u32 u32_offset{offset.U32()}; + const s32 signed_offset{static_cast<s32>(offset.U32())}; + if (signed_offset < 0 || u32_offset > cbuf_size) { + LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); + ctx.AddU32x2("{}=uvec2(0u);", inst); + return; + } + if (u32_offset % 2 == 0) { + ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); + } else { + ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, + OffsetSwizzle(u32_offset + 4)); + } + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", + inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, + swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, + "xyzw"[(swizzle + 1) % 4]); + } +} + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + if (element == 3) { + ctx.AddF32("{}=1.f;", inst, attr); + } else { + ctx.AddF32("{}=0.f;", inst, attr); + } + return; + } + ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); + return; + } + // GLSL only exposes 8 legacy texcoords + if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { + LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", + TexCoordIndex(attr)); + ctx.AddF32("{}=0.f;", inst); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: { + const bool is_array{IsInputArray(ctx.stage)}; + const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""}; + ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); + break; + } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=gl_Color.{};", inst, swizzle); + } else { + ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle); + } + break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); + break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); + break; + case IR::Attribute::InstanceId: + ctx.AddF32("{}=itof(gl_InstanceID);", inst); + break; + case IR::Attribute::VertexId: + ctx.AddF32("{}=itof(gl_VertexID);", inst); + break; + case IR::Attribute::FrontFace: + ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); + break; + default: + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const u32 attr_element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + if (info.num_components == 1) { + ctx.Add("{}{}={};", info.name, output_decorator, value); + } else { + const u32 index_element{attr_element - info.first_element}; + ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); + } + return; + } + const u32 element{static_cast<u32>(attr) % 4}; + const char swizzle{"xyzw"[element]}; + // GLSL only exposes 8 legacy texcoords + if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { + LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", + TexCoordIndex(attr)); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::Layer: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support " + "viewport layer extension"); + break; + } + ctx.Add("gl_Layer=ftoi({});", value); + break; + case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support " + "viewport layer extension"); + break; + } + ctx.Add("gl_ViewportIndex=ftoi({});", value); + break; + case IR::Attribute::ViewportMask: + if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { + LOG_WARNING( + Shader_GLSL, + "Shader stores viewport mask but device does not support viewport mask extension"); + break; + } + ctx.Add("gl_ViewportMask[0]=ftoi({});", value); + break; + case IR::Attribute::PointSize: + ctx.Add("gl_PointSize={};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("gl_Position.{}={};", swizzle, value); + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("gl_FrontColor.{}={};", swizzle, value); + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + ctx.Add("gl_BackColor.{}={};", swizzle, value); + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); + break; + case IR::Attribute::FogCoordinate: + ctx.Add("gl_FogFragCoord={};", value); + break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; + ctx.Add("gl_ClipDistance[{}]={};", index, value); + break; + } + default: + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex) { + const bool is_array{ctx.stage == Stage::Geometry}; + const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""}; + ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg); +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + const char swizzle{"xyzw"[element]}; + ctx.AddF32("{}=patch{}.{};", inst, index, swizzle); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("patch{}.{}={};", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("gl_TessLevelOuter[{}]={};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("gl_TessLevelInner[0]={};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("gl_TessLevelInner[1]={};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + const char swizzle{"xyzw"[component]}; + ctx.Add("frag_color{}.{}={};", index, swizzle, value); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_SampleMask[0]=int({});", value); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_FragDepth={};", value); +} + +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); +} + +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_WorkGroupID;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_InvocationID);", inst); +} + +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SampleID);", inst); +} + +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=gl_HelperInvocation;", inst); +} + +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); +} + +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { + ctx.AddU32("{}=lmem[{}];", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + ctx.Add("lmem[{}]={};", word_offset, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..53f8896be --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Backend::GLSL { + +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + ctx.Add("discard;"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..eeae6562c --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -0,0 +1,230 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); +} + +void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=int({});", inst, value); +} + +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=int({});", inst, value); +} + +void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=int64_t({});", inst, value); +} + +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=int64_t({});", inst, value); +} + +void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float(int({}));", inst, value); +} + +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float(int64_t({}));", inst, value); +} + +void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({}&0xffff);", inst, value); +} + +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(int({}));", inst, value); +} + +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double(int64_t({}));", inst, value); +} + +void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..d423bfb1b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -0,0 +1,456 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, + std::string_view op, bool ordered) { + const auto nan_op{ordered ? "&&!" : "||"}; + ctx.AddU1("{}={}{}{}" + "{}isnan({}){}isnan({});", + inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs); +} + +bool IsPrecise(const IR::Inst& inst) { + return inst.Flags<IR::FpControl>().no_contraction; +} +} // Anonymous namespace + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=abs({});", inst, value); +} + +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=abs({});", inst, value); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}={}+{};", inst, a, b); + } else { + ctx.AddF32("{}={}+{};", inst, a, b); + } +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}={}+{};", inst, a, b); + } else { + ctx.AddF64("{}={}+{};", inst, a, b); + } +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); + } +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); + } +} + +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF32("{}=max({},{});", inst, a, b); +} + +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF64("{}=max({},{});", inst, a, b); +} + +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF32("{}=min({},{});", inst, a, b); +} + +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddF64("{}=min({},{});", inst, a, b); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF32("{}={}*{};", inst, a, b); + } else { + ctx.AddF32("{}={}*{};", inst, a, b); + } +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + if (IsPrecise(inst)) { + ctx.AddPrecF64("{}={}*{};", inst, a, b); + } else { + ctx.AddF64("{}={}*{};", inst, a, b); + } +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=-({});", inst, value); +} + +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=-({});", inst, value); +} + +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=sin({});", inst, value); +} + +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=cos({});", inst, value); +} + +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=exp2({});", inst, value); +} + +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=log2({});", inst, value); +} + +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=(1.0f)/{};", inst, value); +} + +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=1.0/{};", inst, value); +} + +void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=inversesqrt({});", inst, value); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=sqrt({});", inst, value); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); +} + +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { + // GLSL's clamp does not produce desirable results + ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); +} + +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { + // GLSL's clamp does not produce desirable results + ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=roundEven({});", inst, value); +} + +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=roundEven({});", inst, value); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=floor({});", inst, value); +} + +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=floor({});", inst, value); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=ceil({});", inst, value); +} + +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=ceil({});", inst, value); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=trunc({});", inst, value); +} + +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=trunc({});", inst, value); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", true); +} + +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", true); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", false); +} + +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", false); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", true); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", true); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", false); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", false); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", true); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", true); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", false); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", false); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", true); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", true); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", false); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", false); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", true); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", true); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", false); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", false); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", true); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", true); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", false); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", false); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..447eb8e0a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -0,0 +1,799 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) + : ctx.textures.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("tex{}{}", def.binding, index_offset); +} + +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) + : ctx.images.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("img{}{}", def.binding, index_offset); +} + +std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::Buffer: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorArray2D: + return fmt::format("ivec2({})", value); + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("Integer cast for TextureType {}", info.type.Value()); + } +} + +std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::Buffer: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + return fmt::format("ivec2({})", value); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("TexelFetchCast type {}", info.type.Value()); + } +} + +bool NeedsShadowLodExt(TextureType type) { + switch (type) { + case TextureType::ColorArray2D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + return true; + default: + return false; + } +} + +std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsImmediate()) { + return fmt::format("int({})", offset.U32()); + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32()); + case IR::Opcode::CompositeConstructU32x3: + return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32()); + case IR::Opcode::CompositeConstructU32x4: + return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32(), inst->Arg(3).U32()); + default: + break; + } + } + const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; + if (!has_var_aoffi) { + LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING"); + } + const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; + switch (offset.Type()) { + case IR::Type::U32: + return fmt::format("int({})", offset_str); + case IR::Type::U32x2: + return fmt::format("ivec2({})", offset_str); + case IR::Type::U32x3: + return fmt::format("ivec3({})", offset_str); + case IR::Type::U32x4: + return fmt::format("ivec4({})", offset_str); + default: + throw NotImplementedException("Offset type {}", offset.Type()); + } +} + +std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING"); + return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; + } + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0), + read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2), + read(1, 3)); +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} +} // Anonymous namespace + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); + } else { + ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str); + } + } else { + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); + } else { + ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords); + } + } + return; + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", + *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, + texture, coords, texel, bias); + } +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.IsEmpty()) { + ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, + GetOffsetVec(ctx, offset)); + } else { + ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); + } + return; + } + if (!offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, + GetOffsetVec(ctx, offset), texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, + texture, coords, lod_lc, texel); + } +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples"); + } + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && + ctx.stage != Stage::Fragment && needs_shadow_ext}; + if (use_grad) { + LOG_WARNING(Shader_GLSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, + offset_str, bias); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords, + dref, offset_str); + } + } else { + if (ctx.stage == Stage::Fragment) { + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); + } else { + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + } + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); + } + } +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples"); + } + if (info.has_bias) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + if (use_grad) { + LOG_WARNING(Shader_GLSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } + if (!offset.IsEmpty()) { + const auto offset_str{GetOffsetVec(ctx, offset)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, + offset_str); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, + dref, lod_lc, offset_str); + } + } else { + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref, + lod_lc); + } + } +} + +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, + info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, + GetOffsetVec(ctx, offset), info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets, + info.gather_component); + return; + } + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", + *sparse_inst, texture, coords, texel, info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), + texel, info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel, + info.gather_component); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, + GetOffsetVec(ctx, offset)); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); + return; + } + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, + texture, coords, dref, texel); + return; + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, + GetOffsetVec(ctx, offset), texel); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + [[maybe_unused]] std::string_view ms) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_bias) { + throw NotImplementedException("EmitImageFetch Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageFetch Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto sparse_inst{PrepareSparse(inst)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { + if (!offset.empty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, + CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); + } else { + if (info.type == TextureType::Buffer) { + ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); + } else { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, + CoordsCastToInt(coords, info), lod); + } + } + return; + } + if (!offset.empty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, + CastToIntVec(offset, info), texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); + } +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + switch (info.type) { + case TextureType::Color1D: + return ctx.AddU32x4( + "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, + texture, lod, texture); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + return ctx.AddU32x4( + "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, + texture, lod, texture); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + return ctx.AddU32x4( + "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, + lod, texture); + case TextureType::Buffer: + throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto texture{Texture(ctx, info, index)}; + return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageGradient Lod clamp samples"); + } + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageGradient Sparse"); + } + if (!offset.IsEmpty()) { + throw NotImplementedException("EmitImageGradient offset"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; + if (multi_component) { + ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } else { + ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } +} + +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageRead Sparse"); + } + const auto image{Image(ctx, info, index)}; + ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info)); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info), + value); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..5936d086f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -0,0 +1,702 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string_view> + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { +class EmitContext; + +#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst& inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext& ctx, const IR::Value& value); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..38419f88f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -0,0 +1,253 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + ctx.AddU1("{}={}==0;", *zero, result); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + ctx.AddU1("{}=int({})<0;", *sign, result); + sign->Invalidate(); +} + +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + char lop) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}={}{}{};", result, a, lop, b); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} +} // Anonymous namespace + +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + // Compute the overflow CC first as it requires the original operand values, + // which may be overwritten by the result of the addition + if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; + const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; + const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; + const auto negative_result{fmt::format("int({})<int({})", b, sub_a)}; + ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result); + overflow->Invalidate(); + } + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + ctx.uses_cc_carry = true; + ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); + ctx.AddU1("{}=carry!=0;", *carry); + carry->Invalidate(); + } else { + ctx.Add("{}={}+{};", result, a, b); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}+{};", inst, a, b); +} + +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}-{};", inst, a, b); +} + +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}-{};", inst, a, b); +} + +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=uint({}*{});", inst, a, b); +} + +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=uint(-({}));", inst, value); +} + +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=-({});", inst, value); +} + +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=abs(int({}));", inst, value); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}={}<<{};", inst, base, shift); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}={}<<{};", inst, base, shift); +} + +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}={}>>{};", inst, base, shift); +} + +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}={}>>{};", inst, base, shift); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU32("{}=int({})>>{};", inst, base, shift); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { + ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '&'); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '|'); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + BitwiseLogicalOp(ctx, inst, a, b, '^'); +} + +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count) { + ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitfieldReverse({});", inst, value); +} + +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitCount({});", inst, value); +} + +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=~{};", inst, value); +} + +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=findMSB(int({}));", inst, value); +} + +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=findMSB(uint({}));", inst, value); +} + +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=min(int({}),int({}));", inst, a, b); +} + +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b); +} + +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=max(int({}),int({}));", inst, a, b); +} + +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} + +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs); +} + +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs); +} + +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}={}=={};", inst, lhs, rhs); +} + +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs); +} + +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs); +} + +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs); +} + +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); +} + +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { + ctx.AddU1("{}={}!={};", inst, lhs, rhs); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..338ff4bd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { + +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}||{};", inst, a, b); +} + +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}&&{};", inst, a, b); +} + +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}^^{};", inst, a, b); +} + +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=!{};", inst, value); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e3957491f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -0,0 +1,202 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var, + std::string_view value, std::string_view bit_offset, u32 num_bits) { + const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)}; + ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits); +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext&) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32("{}=LoadGlobal32({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32("{}=0u;", inst); +} + +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x2("{}=uvec2(0);", inst); +} + +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + if (ctx.profile.support_int64) { + return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x4("{}=uvec4(0);", inst); +} + +void EmitWriteGlobalU8(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext&) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal32({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal64({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal128({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); +} + +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var); +} + +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); +} + +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, + binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); +} + +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" + "+12)>>2]);", + inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), + offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, + binding.U32(), offset_var); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..f420fe388 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLSL { + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..49fba9073 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..518b78f06 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view bit_offset, u32 num_bits) { + const auto smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); +} +} // Anonymous namespace + +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32("{}=smem[{}>>2];", inst, offset); +} + +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); +} + +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { + ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, + offset, offset, offset, offset); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + const auto bit_offset{fmt::format("int({}%4)*8", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 8); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 16); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={};", offset, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); + ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); + ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..9b866f889 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -0,0 +1,111 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.uses_geometry_passthrough) { + return; + } + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { + continue; + } + const auto& info_array{ctx.output_generics.at(index)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + size_t element{}; + while (element < info_array.size()) { + const auto& info{info_array.at(element)}; + const auto varying_name{fmt::format("{}{}", info.name, output_decorator)}; + switch (info.num_components) { + case 1: { + const char value{element == 3 ? '1' : '0'}; + ctx.Add("{}={}.f;", varying_name, value); + break; + } + case 2: + case 3: + if (element + info.num_components < 4) { + ctx.Add("{}=vec{}(0);", varying_name, info.num_components); + } else { + // last element is the w component, must be initialized to 1 + const auto zeros{info.num_components == 3 ? "0,0," : "0,"}; + ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros); + } + break; + case 4: + ctx.Add("{}=vec4(0,0,0,1);", varying_name); + break; + default: + break; + } + element += info.num_components; + } + } +} +} // Anonymous namespace + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.var_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); + } +} + +void EmitVoid(EmitContext&) {} + +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.var_alloc.Consume(value); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{*phi_value.InstRecursive()}; + const auto phi_type{phi.Arg(0).Type()}; + if (!phi.Definition<Id>().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi_type); + } + const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; + const auto val_reg{ctx.var_alloc.Consume(value)}; + if (phi_reg == val_reg) { + return; + } + ctx.Add("{}={};", phi_reg, val_reg); +} + +void EmitPrologue(EmitContext& ctx) { + InitializeOutputVaryings(ctx); +} + +void EmitEpilogue(EmitContext&) {} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeOutputVaryings(ctx); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..15bf02dd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp @@ -0,0 +1,32 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" + +namespace Shader::Backend::GLSL { + +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=false;", inst); +} + +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU64("{}=0u;", inst); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..a982dd8a2 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -0,0 +1,217 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); + in_bounds->Invalidate(); +} + +std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { + return fmt::format("({}&{})", thread_id, segmentation_mask); +} + +std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, + std::string_view not_seg_mask) { + return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); +} + +std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, + std::string_view segmentation_mask) { + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); +} + +void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, + std::string_view value, std::string_view index, + [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { + const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; + ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); + SetInBoundsFlag(ctx, inst); +} +} // Anonymous namespace + +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); + } +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); + } +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); + } +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); + } else { + ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); + } +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); + return; + } + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; + + const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; + const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); + return; + } + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); + return; + } + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); + return; + } + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); +} + +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; + const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); + const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); + ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); +} + +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } +} + +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } +} + +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } +} + +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp new file mode 100644 index 000000000..194f926ca --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -0,0 +1,308 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> +#include <string_view> + +#include <fmt/format.h> + +#include "shader_recompiler/backend/glsl/var_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string TypePrefix(GlslVarType type) { + switch (type) { + case GlslVarType::U1: + return "b_"; + case GlslVarType::F16x2: + return "f16x2_"; + case GlslVarType::U32: + return "u_"; + case GlslVarType::F32: + return "f_"; + case GlslVarType::U64: + return "u64_"; + case GlslVarType::F64: + return "d_"; + case GlslVarType::U32x2: + return "u2_"; + case GlslVarType::F32x2: + return "f2_"; + case GlslVarType::U32x3: + return "u3_"; + case GlslVarType::F32x3: + return "f3_"; + case GlslVarType::U32x4: + return "u4_"; + case GlslVarType::F32x4: + return "f4_"; + case GlslVarType::PrecF32: + return "pf_"; + case GlslVarType::PrecF64: + return "pd_"; + case GlslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +std::string FormatFloat(std::string_view value, IR::Type type) { + // TODO: Confirm FP64 nan/inf + if (type == IR::Type::F32) { + if (value == "nan") { + return "utof(0x7fc00000)"; + } + if (value == "inf") { + return "utof(0x7f800000)"; + } + if (value == "-inf") { + return "utof(0xff800000)"; + } + } + if (value.find_first_of('e') != std::string_view::npos) { + // scientific notation + const auto cast{type == IR::Type::F32 ? "float" : "double"}; + return fmt::format("{}({})", cast, value); + } + const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; + const bool needs_suffix{!value.ends_with('f')}; + const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; + return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}u", value.U32()); + case IR::Type::F32: + return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); + case IR::Type::U64: + return fmt::format("{}ul", value.U64()); + case IR::Type::F64: + return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string VarAlloc::Representation(u32 index, GlslVarType type) const { + const auto prefix{TypePrefix(type)}; + return fmt::format("{}{}", prefix, index); +} + +std::string VarAlloc::Representation(Id id) const { + return Representation(id.index, id.type); +} + +std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition<Id>(Alloc(type)); + return Representation(inst.Definition<Id>()); + } else { + Id id{}; + id.type.Assign(type); + GetUseTracker(type).uses_temp = true; + inst.SetDefinition<Id>(id); + return 't' + Representation(inst.Definition<Id>()); + } +} + +std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { + return Define(inst, RegType(type)); +} + +std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) { + return AddDefine(inst, RegType(type)); +} + +std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition<Id>(Alloc(type)); + return Representation(inst.Definition<Id>()); + } else { + return ""; + } + return Representation(inst.Definition<Id>()); +} + +std::string VarAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); +} + +std::string VarAlloc::ConsumeInst(IR::Inst& inst) { + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(inst.Definition<Id>()); + } + return Representation(inst.Definition<Id>()); +} + +std::string VarAlloc::GetGlslType(IR::Type type) const { + return GetGlslType(RegType(type)); +} + +Id VarAlloc::Alloc(GlslVarType type) { + auto& use_tracker{GetUseTracker(type)}; + const auto num_vars{use_tracker.var_use.size()}; + for (size_t var = 0; var < num_vars; ++var) { + if (use_tracker.var_use[var]) { + continue; + } + use_tracker.num_used = std::max(use_tracker.num_used, var + 1); + use_tracker.var_use[var] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast<u32>(var)); + return ret; + } + // Allocate a new variable + use_tracker.var_use.push_back(true); + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast<u32>(use_tracker.num_used)); + ++use_tracker.num_used; + return ret; +} + +void VarAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid variable"); + } + auto& use_tracker{GetUseTracker(id.type)}; + use_tracker.var_use[id.index] = false; +} + +GlslVarType VarAlloc::RegType(IR::Type type) const { + switch (type) { + case IR::Type::U1: + return GlslVarType::U1; + case IR::Type::U32: + return GlslVarType::U32; + case IR::Type::F32: + return GlslVarType::F32; + case IR::Type::U64: + return GlslVarType::U64; + case IR::Type::F64: + return GlslVarType::F64; + default: + throw NotImplementedException("IR type {}", type); + } +} + +std::string VarAlloc::GetGlslType(GlslVarType type) const { + switch (type) { + case GlslVarType::U1: + return "bool"; + case GlslVarType::F16x2: + return "f16vec2"; + case GlslVarType::U32: + return "uint"; + case GlslVarType::F32: + case GlslVarType::PrecF32: + return "float"; + case GlslVarType::U64: + return "uint64_t"; + case GlslVarType::F64: + case GlslVarType::PrecF64: + return "double"; + case GlslVarType::U32x2: + return "uvec2"; + case GlslVarType::F32x2: + return "vec2"; + case GlslVarType::U32x3: + return "uvec3"; + case GlslVarType::F32x3: + return "vec3"; + case GlslVarType::U32x4: + return "uvec4"; + case GlslVarType::F32x4: + return "vec4"; + case GlslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { + switch (type) { + case GlslVarType::U1: + return var_bool; + case GlslVarType::F16x2: + return var_f16x2; + case GlslVarType::U32: + return var_u32; + case GlslVarType::F32: + return var_f32; + case GlslVarType::U64: + return var_u64; + case GlslVarType::F64: + return var_f64; + case GlslVarType::U32x2: + return var_u32x2; + case GlslVarType::F32x2: + return var_f32x2; + case GlslVarType::U32x3: + return var_u32x3; + case GlslVarType::F32x3: + return var_f32x3; + case GlslVarType::U32x4: + return var_u32x4; + case GlslVarType::F32x4: + return var_f32x4; + case GlslVarType::PrecF32: + return var_precf32; + case GlslVarType::PrecF64: + return var_precf64; + default: + throw NotImplementedException("Type {}", type); + } +} + +const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { + switch (type) { + case GlslVarType::U1: + return var_bool; + case GlslVarType::F16x2: + return var_f16x2; + case GlslVarType::U32: + return var_u32; + case GlslVarType::F32: + return var_f32; + case GlslVarType::U64: + return var_u64; + case GlslVarType::F64: + return var_f64; + case GlslVarType::U32x2: + return var_u32x2; + case GlslVarType::F32x2: + return var_f32x2; + case GlslVarType::U32x3: + return var_u32x3; + case GlslVarType::F32x3: + return var_f32x3; + case GlslVarType::U32x4: + return var_u32x4; + case GlslVarType::F32x4: + return var_f32x4; + case GlslVarType::PrecF32: + return var_precf32; + case GlslVarType::PrecF64: + return var_precf64; + default: + throw NotImplementedException("Type {}", type); + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h new file mode 100644 index 000000000..8b49f32a6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> +#include <string> +#include <vector> + +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +enum class Type; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { +enum class GlslVarType : u32 { + U1, + F16x2, + U32, + F32, + U64, + F64, + U32x2, + F32x2, + U32x3, + F32x3, + U32x4, + F32x4, + PrecF32, + PrecF64, + Void, +}; + +struct Id { + union { + u32 raw; + BitField<0, 1, u32> is_valid; + BitField<1, 4, GlslVarType> type; + BitField<6, 26, u32> index; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(sizeof(Id) == sizeof(u32)); + +class VarAlloc { +public: + struct UseTracker { + bool uses_temp{}; + size_t num_used{}; + std::vector<bool> var_use; + }; + + /// Used for explicit usages of variables, may revert to temporaries + std::string Define(IR::Inst& inst, GlslVarType type); + std::string Define(IR::Inst& inst, IR::Type type); + + /// Used to assign variables used by the IR. May return a blank string if + /// the instruction's result is unused in the IR. + std::string AddDefine(IR::Inst& inst, GlslVarType type); + std::string PhiDefine(IR::Inst& inst, IR::Type type); + + std::string Consume(const IR::Value& value); + std::string ConsumeInst(IR::Inst& inst); + + std::string GetGlslType(GlslVarType type) const; + std::string GetGlslType(IR::Type type) const; + + const UseTracker& GetUseTracker(GlslVarType type) const; + std::string Representation(u32 index, GlslVarType type) const; + +private: + GlslVarType RegType(IR::Type type) const; + Id Alloc(GlslVarType type); + void Free(Id id); + UseTracker& GetUseTracker(GlslVarType type); + std::string Representation(Id id) const; + + UseTracker var_bool{}; + UseTracker var_f16x2{}; + UseTracker var_u32{}; + UseTracker var_u32x2{}; + UseTracker var_u32x3{}; + UseTracker var_u32x4{}; + UseTracker var_f32{}; + UseTracker var_f32x2{}; + UseTracker var_f32x3{}; + UseTracker var_f32x4{}; + UseTracker var_u64{}; + UseTracker var_f64{}; + UseTracker var_precf32{}; + UseTracker var_precf64{}; +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp new file mode 100644 index 000000000..2d29d8c14 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -0,0 +1,1368 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <climits> +#include <string_view> + +#include <fmt/format.h> + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "shader_recompiler/backend/spirv/emit_context.h" + +namespace Shader::Backend::SPIRV { +namespace { +enum class Operation { + Increment, + Decrement, + FPAdd, + FPMin, + FPMax, +}; + +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id type{ctx.F32[1]}; + const bool depth{desc.is_depth}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); + case TextureType::ColorCube: + return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); + case TextureType::ColorArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); + case TextureType::Buffer: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + +spv::ImageFormat GetImageFormat(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return spv::ImageFormat::Unknown; + case ImageFormat::R8_UINT: + return spv::ImageFormat::R8ui; + case ImageFormat::R8_SINT: + return spv::ImageFormat::R8i; + case ImageFormat::R16_UINT: + return spv::ImageFormat::R16ui; + case ImageFormat::R16_SINT: + return spv::ImageFormat::R16i; + case ImageFormat::R32_UINT: + return spv::ImageFormat::R32ui; + case ImageFormat::R32G32_UINT: + return spv::ImageFormat::Rg32ui; + case ImageFormat::R32G32B32A32_UINT: + return spv::ImageFormat::Rgba32ui; + } + throw InvalidArgument("Invalid image format {}", format); +} + +Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { + const spv::ImageFormat format{GetImageFormat(desc.format)}; + const Id type{ctx.U32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); + case TextureType::Buffer: + throw NotImplementedException("Image buffer"); + default: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + +Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin, + spv::StorageClass storage_class) { + const Id pointer_type{ctx.TypePointer(storage_class, type)}; + const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + ctx.interfaces.push_back(id); + return id; +} + +u32 NumVertices(InputTopology input_topology) { + switch (input_topology) { + case InputTopology::Points: + return 1; + case InputTopology::Lines: + return 2; + case InputTopology::LinesAdjacency: + return 4; + case InputTopology::Triangles: + return 3; + case InputTopology::TrianglesAdjacency: + return 6; + } + throw InvalidArgument("Invalid input topology {}", input_topology); +} + +Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, + std::optional<spv::BuiltIn> builtin = std::nullopt) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + if (per_invocation) { + type = ctx.TypeArray(type, ctx.Const(32u)); + } + break; + case Stage::Geometry: + if (per_invocation) { + const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; + type = ctx.TypeArray(type, ctx.Const(num_vertices)); + } + break; + default: + break; + } + return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); +} + +Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations, + std::optional<spv::BuiltIn> builtin = std::nullopt) { + if (invocations && ctx.stage == Stage::TessellationControl) { + type = ctx.TypeArray(type, ctx.Const(*invocations)); + } + return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); +} + +void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!ctx.runtime_info.xfb_varyings.empty()) { + xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + + const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; + ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + if (element > 0) { + ctx.Decorate(id, spv::Decoration::Component, element); + } + if (xfb_varying) { + ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); + ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); + ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); + } + if (num_components < 4 || element > 0) { + const std::string_view subswizzle{swizzle.substr(element, num_components)}; + ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } else { + ctx.Name(id, fmt::format("out_attr{}", index)); + } + const GenericElementInfo info{ + .id = id, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(ctx.output_generics[index].begin() + element, num_components, info); + element += num_components; + } +} + +Id GetAttributeType(EmitContext& ctx, AttributeType type) { + switch (type) { + case AttributeType::Float: + return ctx.F32[4]; + case AttributeType::SignedInt: + return ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedInt: + return ctx.U32[4]; + case AttributeType::Disabled: + break; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + +std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + +std::string_view StageName(Stage stage) { + switch (stage) { + case Stage::VertexA: + return "vs_a"; + case Stage::VertexB: + return "vs"; + case Stage::TessellationControl: + return "tcs"; + case Stage::TessellationEval: + return "tes"; + case Stage::Geometry: + return "gs"; + case Stage::Fragment: + return "fs"; + case Stage::Compute: + return "cs"; + } + throw InvalidArgument("Invalid stage {}", stage); +} + +template <typename... Args> +void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { + ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), + std::forward<Args>(args)...) + .c_str()); +} + +void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{ctx.TypeStruct(array_type)}; + Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT); + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberName(struct_type, 0, "data"); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; + ctx.uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + ctx.cbufs[desc.index + i].*member_type = id; + } + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + binding += desc.count; + } +} + +void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, + u32 stride) { + const Id array_type{ctx.TypeRuntimeArray(type)}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + type_def.array = struct_pointer; + type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); + + u32 index{}; + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("ssbo{}", index)); + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + for (size_t i = 0; i < desc.count; ++i) { + ctx.ssbos[index + i].*member_type = id; + } + index += desc.count; + binding += desc.count; + } +} + +Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (operation) { + case Operation::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case Operation::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case Operation::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case Operation::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case Operation::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + +Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, + Id value_type, Id memory_type, spv::Scope scope) { + const bool is_shared{scope == spv::Scope::Workgroup}; + const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; + const Id cas_func{CasFunction(ctx, operation, value_type)}; + const Id zero{ctx.u32_zero_value}; + const Id scope_id{ctx.Const(static_cast<u32>(scope))}; + + const Id loop_header{ctx.OpLabel()}; + const Id continue_block{ctx.OpLabel()}; + const Id merge_block{ctx.OpLabel()}; + const Id func_type{is_shared + ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) + : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; + + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; + ctx.AddLabel(); + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + + ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + ctx.OpBranch(continue_block); + + ctx.AddLabel(continue_block); + const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) + : ctx.OpAccessChain(element_pointer, base, index)}; + if (value_type.value == ctx.F32[2].value) { + const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; + const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; + const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; + const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, u32_new_value, u32_value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); + } else { + const Id value{ctx.OpLoad(memory_type, word_pointer)}; + const bool matching_type{value_type.value == memory_type.value}; + const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; + const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; + const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, new_value, value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); + } + ctx.OpFunctionEnd(); + return func; +} + +template <typename Desc> +std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) { + if (desc.count > 1) { + return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index, + desc.cbuf_offset, desc.count); + } else { + return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index, + desc.cbuf_offset); + } +} + +Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { + if (count > 1) { + const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; + return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); + } else { + return pointer_type; + } +} +} // Anonymous namespace + +void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { + defs[0] = sirit_ctx.Name(base_type, name); + + std::array<char, 6> def_name; + for (int i = 1; i < 4; ++i) { + const std::string_view def_name_view( + def_name.data(), + fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); + defs[static_cast<size_t>(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + } +} + +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + IR::Program& program, Bindings& bindings) + : Sirit::Module(profile_.supported_spirv), profile{profile_}, + runtime_info{runtime_info_}, stage{program.stage} { + const bool is_unified{profile.unified_descriptor_binding}; + u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; + u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; + u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; + u32& image_binding{is_unified ? bindings.unified : bindings.image}; + AddCapability(spv::Capability::Shader); + DefineCommonTypes(program.info); + DefineCommonConstants(); + DefineInterfaces(program); + DefineLocalMemory(program); + DefineSharedMemory(program); + DefineSharedMemoryFunctions(program); + DefineConstantBuffers(program.info, uniform_binding); + DefineStorageBuffers(program.info, storage_binding); + DefineTextureBuffers(program.info, texture_binding); + DefineImageBuffers(program.info, image_binding); + DefineTextures(program.info, texture_binding); + DefineImages(program.info, image_binding); + DefineAttributeMemAccess(program.info); + DefineGlobalMemoryFunctions(program.info); +} + +EmitContext::~EmitContext() = default; + +Id EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return value.InstRecursive()->Definition<Id>(); + } + switch (value.Type()) { + case IR::Type::Void: + // Void instructions are used for optional arguments (e.g. texture offsets) + // They are not meant to be used in the SPIR-V module + return Id{}; + case IR::Type::U1: + return value.U1() ? true_value : false_value; + case IR::Type::U32: + return Const(value.U32()); + case IR::Type::U64: + return Constant(U64, value.U64()); + case IR::Type::F32: + return Const(value.F32()); + case IR::Type::F64: + return Constant(F64[1], value.F64()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} + +Id EmitContext::BitOffset8(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const((offset.U32() % 4) * 8); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u)); +} + +Id EmitContext::BitOffset16(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const(((offset.U32() / 2) % 2) * 16); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); +} + +void EmitContext::DefineCommonTypes(const Info& info) { + void_id = TypeVoid(); + + U1 = Name(TypeBool(), "u1"); + + F32.Define(*this, TypeFloat(32), "f32"); + U32.Define(*this, TypeInt(32, false), "u32"); + S32.Define(*this, TypeInt(32, true), "s32"); + + private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); + + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); + + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); + + if (info.uses_int8 && profile.support_int8) { + AddCapability(spv::Capability::Int8); + U8 = Name(TypeInt(8, false), "u8"); + S8 = Name(TypeInt(8, true), "s8"); + } + if (info.uses_int16 && profile.support_int16) { + AddCapability(spv::Capability::Int16); + U16 = Name(TypeInt(16, false), "u16"); + S16 = Name(TypeInt(16, true), "s16"); + } + if (info.uses_int64) { + AddCapability(spv::Capability::Int64); + U64 = Name(TypeInt(64, false), "u64"); + } + if (info.uses_fp16) { + AddCapability(spv::Capability::Float16); + F16.Define(*this, TypeFloat(16), "f16"); + } + if (info.uses_fp64) { + AddCapability(spv::Capability::Float64); + F64.Define(*this, TypeFloat(64), "f64"); + } +} + +void EmitContext::DefineCommonConstants() { + true_value = ConstantTrue(U1); + false_value = ConstantFalse(U1); + u32_zero_value = Const(0U); + f32_zero_value = Const(0.0f); +} + +void EmitContext::DefineInterfaces(const IR::Program& program) { + DefineInputs(program); + DefineOutputs(program); +} + +void EmitContext::DefineLocalMemory(const IR::Program& program) { + if (program.local_memory_size == 0) { + return; + } + const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Const(num_elements))}; + const Id pointer{TypePointer(spv::StorageClass::Private, type)}; + local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(local_memory); + } +} + +void EmitContext::DefineSharedMemory(const IR::Program& program) { + if (program.shared_memory_size == 0) { + return; + } + const auto make{[&](Id element_type, u32 element_size) { + const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; + const Id array_type{TypeArray(element_type, Const(num_elements))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); + Decorate(struct_type, spv::Decoration::Block); + + const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; + const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; + const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; + Decorate(variable, spv::Decoration::Aliased); + interfaces.push_back(variable); + + return std::make_tuple(variable, element_pointer, pointer); + }}; + if (profile.support_explicit_workgroup_layout) { + AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); + if (program.info.uses_int8) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); + std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); + } + if (program.info.uses_int16) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); + std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); + } + if (program.info.uses_int64) { + std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); + } + std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); + return; + } + const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Const(num_elements))}; + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); + interfaces.push_back(shared_memory_u32); + + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; + const Id count{Const(size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, + u32_zero_value, new_value, old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (program.info.uses_int8) { + shared_store_u8_func = make_function(24, 8); + } + if (program.info.uses_int16) { + shared_store_u16_func = make_function(16, 16); + } +} + +void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { + if (program.info.uses_shared_increment) { + increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); + } + if (program.info.uses_shared_decrement) { + decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); + } +} + +void EmitContext::DefineAttributeMemAccess(const Info& info) { + const auto make_load{[&] { + const bool is_array{stage == Stage::Geometry}; + const Id end_block{OpLabel()}; + const Id default_label{OpLabel()}; + + const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) + : TypeFunction(F32[1], U32[1])}; + const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; + + AddLabel(); + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; + std::vector<Sirit::Literal> literals; + std::vector<Id> labels; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); + labels.push_back(OpLabel()); + } + const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; + for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { + continue; + } + literals.push_back(base_attribute_value + index); + labels.push_back(OpLabel()); + } + OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); + OpSwitch(compare_index, default_label, literals, labels); + AddLabel(default_label); + OpReturnValue(Const(0.0f)); + size_t label_index{0}; + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { + AddLabel(labels[label_index]); + const Id pointer{is_array + ? OpAccessChain(input_f32, input_position, vertex, masked_index) + : OpAccessChain(input_f32, input_position, masked_index)}; + const Id result{OpLoad(F32[1], pointer)}; + OpReturnValue(result); + ++label_index; + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { + continue; + } + AddLabel(labels[label_index]); + const auto type{AttrTypes(*this, static_cast<u32>(index))}; + if (!type) { + OpReturnValue(Const(0.0f)); + ++label_index; + continue; + } + const Id generic_id{input_generics.at(index)}; + const Id pointer{is_array + ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) + : OpAccessChain(type->pointer, generic_id, masked_index)}; + const Id value{OpLoad(type->id, pointer)}; + const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; + OpReturnValue(result); + ++label_index; + } + AddLabel(end_block); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + const auto make_store{[&] { + const Id end_block{OpLabel()}; + const Id default_label{OpLabel()}; + + const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])}; + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id store_value{OpFunctionParameter(F32[1])}; + AddLabel(); + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; + std::vector<Sirit::Literal> literals; + std::vector<Id> labels; + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { + literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); + labels.push_back(OpLabel()); + } + const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { + continue; + } + literals.push_back(base_attribute_value + static_cast<u32>(index)); + labels.push_back(OpLabel()); + } + if (info.stores.ClipDistances()) { + literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); + labels.push_back(OpLabel()); + literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); + labels.push_back(OpLabel()); + } + OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); + OpSwitch(compare_index, default_label, literals, labels); + AddLabel(default_label); + OpReturn(); + size_t label_index{0}; + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { + continue; + } + if (output_generics[index][0].num_components != 4) { + throw NotImplementedException("Physical stores and transform feedbacks"); + } + AddLabel(labels[label_index]); + const Id generic_id{output_generics[index][0].id}; + const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + } + if (info.stores.ClipDistances()) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + ++label_index; + AddLabel(labels[label_index]); + const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; + const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; + OpStore(pointer2, store_value); + OpReturn(); + ++label_index; + } + AddLabel(end_block); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + if (info.loads_indexed_attributes) { + indexed_load_func = make_load(); + } + if (info.stores_indexed_attributes) { + indexed_store_func = make_store(); + } +} + +void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { + if (!info.uses_global_memory || !profile.support_int64) { + return; + } + using DefPtr = Id StorageDefinitions::*; + const Id zero{u32_zero_value}; + const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, + auto&& callback) { + AddLabel(); + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; + const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; + const Id ssbo_addr_pointer{OpAccessChain( + uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; + const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, + zero, ssbo_size_cbuf_offset)}; + + const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; + const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; + const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), + OpULessThan(U1, addr, ssbo_end))}; + const Id then_label{OpLabel()}; + const Id else_label{OpLabel()}; + OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(cond, then_label, else_label); + AddLabel(then_label); + const Id ssbo_id{ssbos[index].*ssbo_member}; + const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; + const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; + const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; + callback(ssbo_pointer); + AddLabel(else_label); + } + }}; + const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(type, U64)}; + const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + define_body(ssbo_member, addr, element_pointer, shift, + [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); + OpReturnValue(ConstantNull(type)); + OpFunctionEnd(); + return func_id; + }}; + const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(void_id, U64, type)}; + const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + const Id data{OpFunctionParameter(type)}; + define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { + OpStore(ssbo_pointer, data); + OpReturn(); + }); + OpReturn(); + OpFunctionEnd(); + return func_id; + }}; + const auto define{ + [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { + const Id element_type{type_def.element}; + const u32 shift{static_cast<u32>(std::countr_zero(size))}; + const Id load_func{define_load(ssbo_member, element_type, type, shift)}; + const Id write_func{define_write(ssbo_member, element_type, type, shift)}; + return std::make_pair(load_func, write_func); + }}; + std::tie(load_global_func_u32, write_global_func_u32) = + define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); + std::tie(load_global_func_u32x2, write_global_func_u32x2) = + define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); + std::tie(load_global_func_u32x4, write_global_func_u32x4) = + define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); +} + +void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + if (!profile.support_descriptor_aliasing) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', + sizeof(u32[4])); + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + binding += desc.count; + } + return; + } + IR::Type types{info.used_constant_buffer_types}; + if (True(types & IR::Type::U8)) { + if (profile.support_int8) { + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } else { + types |= IR::Type::U32; + } + } + if (True(types & IR::Type::U16)) { + if (profile.support_int16) { + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', + sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', + sizeof(s16)); + } else { + types |= IR::Type::U32; + } + } + if (True(types & IR::Type::U32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); + } + if (True(types & IR::Type::F32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); + } + if (True(types & IR::Type::U32x2)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); + } + binding += static_cast<u32>(info.constant_buffer_descriptors.size()); +} + +void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + AddExtension("SPV_KHR_storage_buffer_storage_class"); + + const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types + : IR::Type::U32}; + if (profile.support_int8 && True(used_types & IR::Type::U8)) { + DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, + sizeof(u8)); + DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, + sizeof(u8)); + } + if (profile.support_int16 && True(used_types & IR::Type::U16)) { + DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, + sizeof(u16)); + DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, + sizeof(u16)); + } + if (True(used_types & IR::Type::U32)) { + DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], + sizeof(u32)); + } + if (True(used_types & IR::Type::F32)) { + DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], + sizeof(f32)); + } + if (True(used_types & IR::Type::U64)) { + DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, + sizeof(u64)); + } + if (True(used_types & IR::Type::U32x2)) { + DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], + sizeof(u32[2])); + } + if (True(used_types & IR::Type::U32x4)) { + DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], + sizeof(u32[4])); + } + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + binding += desc.count; + } + const bool needs_function{ + info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || + info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || + info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; + if (needs_function) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + } + if (info.uses_global_increment) { + increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); + } + if (info.uses_global_decrement) { + decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); + } + if (info.uses_atomic_f32_add) { + f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); + } + if (info.uses_atomic_f16x2_add) { + f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); + } + if (info.uses_atomic_f16x2_min) { + f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); + } + if (info.uses_atomic_f16x2_max) { + f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); + } + if (info.uses_atomic_f32x2_add) { + f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); + } + if (info.uses_atomic_f32x2_min) { + f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); + } + if (info.uses_atomic_f32x2_max) { + f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); + } +} + +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "texbuf")); + texture_buffers.push_back({ + .id = id, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } +} + +void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of image buffers"); + } + const spv::ImageFormat format{GetImageFormat(desc.format)}; + const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "imgbuf")); + image_buffers.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } +} + +void EmitContext::DefineTextures(const Info& info, u32& binding) { + textures.reserve(info.texture_descriptors.size()); + for (const TextureDescriptor& desc : info.texture_descriptors) { + const Id image_type{ImageType(*this, desc)}; + const Id sampled_type{TypeSampledImage(image_type)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; + const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; + const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "tex")); + textures.push_back({ + .id = id, + .sampled_type = sampled_type, + .pointer_type = pointer_type, + .image_type = image_type, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } + if (info.uses_atomic_image_u32) { + image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + } +} + +void EmitContext::DefineImages(const Info& info, u32& binding) { + images.reserve(info.image_descriptors.size()); + for (const ImageDescriptor& desc : info.image_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of images"); + } + const Id image_type{ImageType(*this, desc)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, NameOf(stage, desc, "img")); + images.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + ++binding; + } +} + +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + + if (info.uses_workgroup_id) { + workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); + } + if (info.uses_local_invocation_id) { + local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); + } + if (info.uses_invocation_id) { + invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); + } + if (info.uses_sample_id) { + sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + } + if (info.uses_is_helper_invocation) { + is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); + } + if (info.uses_subgroup_mask) { + subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); + } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || + (profile.warp_size_potentially_larger_than_guest && + (info.uses_subgroup_vote || info.uses_subgroup_mask))) { + subgroup_local_invocation_id = + DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); + } + if (info.uses_fswzadd) { + const Id f32_one{Const(1.0f)}; + const Id f32_minus_one{Const(-1.0f)}; + const Id f32_zero{Const(0.0f)}; + fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); + fswzadd_lut_b = + ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); + } + if (loads[IR::Attribute::PrimitiveId]) { + primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); + } + if (loads.AnyComponent(IR::Attribute::PositionX)) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } + } + if (loads[IR::Attribute::InstanceId]) { + if (profile.support_vertex_instance_id) { + instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); + } else { + instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); + } + } + if (loads[IR::Attribute::VertexId]) { + if (profile.support_vertex_instance_id) { + vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); + } else { + vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } + } + if (loads[IR::Attribute::FrontFace]) { + front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); + } + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { + point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); + } + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { + tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { + continue; + } + if (!loads.Generic(index)) { + continue; + } + if (input_type == AttributeType::Disabled) { + continue; + } + const Id type{GetAttributeType(*this, input_type)}; + const Id id{DefineInput(*this, type, true)}; + Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + Name(id, fmt::format("in_attr{}", index)); + input_generics[index] = id; + + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } + if (stage != Stage::Fragment) { + continue; + } + switch (info.interpolation[index]) { + case Interpolation::Smooth: + // Default + // Decorate(id, spv::Decoration::Smooth); + break; + case Interpolation::NoPerspective: + Decorate(id, spv::Decoration::NoPerspective); + break; + case Interpolation::Flat: + Decorate(id, spv::Decoration::Flat); + break; + } + } + if (stage == Stage::TessellationEval) { + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineInput(*this, F32[4], false)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + patches[index] = id; + } + } +} + +void EmitContext::DefineOutputs(const IR::Program& program) { + const Info& info{program.info}; + const std::optional<u32> invocations{program.invocations}; + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { + output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); + } + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing PointSize in fragment stage"); + } + output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); + } + if (info.stores.ClipDistances()) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing ClipDistance in fragment stage"); + } + const Id type{TypeArray(F32[1], Const(8U))}; + clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); + } + if (info.stores[IR::Attribute::Layer] && + (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing Layer in fragment stage"); + } + layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); + } + if (info.stores[IR::Attribute::ViewportIndex] && + (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing ViewportIndex in fragment stage"); + } + viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, + spv::BuiltIn::ViewportMaskNV); + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(*this, index, invocations); + } + } + switch (stage) { + case Stage::TessellationControl: + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], Const(4U))}; + output_tess_level_outer = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], Const(2U))}; + output_tess_level_inner = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineOutput(*this, F32[4], std::nullopt)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); + patches[index] = id; + } + break; + case Stage::Fragment: + for (u32 index = 0; index < 8; ++index) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { + continue; + } + frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); + Decorate(frag_color[index], spv::Decoration::Location, index); + Name(frag_color[index], fmt::format("frag_color{}", index)); + } + if (info.stores_frag_depth) { + frag_depth = DefineOutput(*this, F32[1], std::nullopt); + Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); + } + if (info.stores_sample_mask) { + sample_mask = DefineOutput(*this, U32[1], std::nullopt); + Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); + } + break; + default: + break; + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h new file mode 100644 index 000000000..e277bc358 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -0,0 +1,307 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <string_view> + +#include <sirit/sirit.h> + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class VectorTypes { +public: + void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); + + [[nodiscard]] Id operator[](size_t size) const noexcept { + return defs[size - 1]; + } + +private: + std::array<Id, 4> defs{}; +}; + +struct TextureDefinition { + Id id; + Id sampled_type; + Id pointer_type; + Id image_type; + u32 count; +}; + +struct TextureBufferDefinition { + Id id; + u32 count; +}; + +struct ImageBufferDefinition { + Id id; + Id image_type; + u32 count; +}; + +struct ImageDefinition { + Id id; + Id image_type; + u32 count; +}; + +struct UniformDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U32x2{}; + Id U32x4{}; +}; + +struct StorageTypeDefinition { + Id array{}; + Id element{}; +}; + +struct StorageTypeDefinitions { + StorageTypeDefinition U8{}; + StorageTypeDefinition S8{}; + StorageTypeDefinition U16{}; + StorageTypeDefinition S16{}; + StorageTypeDefinition U32{}; + StorageTypeDefinition U64{}; + StorageTypeDefinition F32{}; + StorageTypeDefinition U32x2{}; + StorageTypeDefinition U32x4{}; +}; + +struct StorageDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; + Id U32x2{}; + Id U32x4{}; +}; + +struct GenericElementInfo { + Id id{}; + u32 first_element{}; + u32 num_components{}; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& binding); + ~EmitContext(); + + [[nodiscard]] Id Def(const IR::Value& value); + + [[nodiscard]] Id BitOffset8(const IR::Value& offset); + [[nodiscard]] Id BitOffset16(const IR::Value& offset); + + Id Const(u32 value) { + return Constant(U32[1], value); + } + + Id Const(u32 element_1, u32 element_2) { + return ConstantComposite(U32[2], Const(element_1), Const(element_2)); + } + + Id Const(u32 element_1, u32 element_2, u32 element_3) { + return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3)); + } + + Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { + return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3), + Const(element_4)); + } + + Id SConst(s32 value) { + return Constant(S32[1], value); + } + + Id SConst(s32 element_1, s32 element_2) { + return ConstantComposite(S32[2], SConst(element_1), SConst(element_2)); + } + + Id SConst(s32 element_1, s32 element_2, s32 element_3) { + return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3)); + } + + Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) { + return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3), + SConst(element_4)); + } + + Id Const(f32 value) { + return Constant(F32[1], value); + } + + const Profile& profile; + const RuntimeInfo& runtime_info; + Stage stage{}; + + Id void_id{}; + Id U1{}; + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U64{}; + VectorTypes F32; + VectorTypes U32; + VectorTypes S32; + VectorTypes F16; + VectorTypes F64; + + Id true_value{}; + Id false_value{}; + Id u32_zero_value{}; + Id f32_zero_value{}; + + UniformDefinitions uniform_types; + StorageTypeDefinitions storage_types; + + Id private_u32{}; + + Id shared_u8{}; + Id shared_u16{}; + Id shared_u32{}; + Id shared_u64{}; + Id shared_u32x2{}; + Id shared_u32x4{}; + + Id input_f32{}; + Id input_u32{}; + Id input_s32{}; + + Id output_f32{}; + Id output_u32{}; + + Id image_buffer_type{}; + Id sampled_texture_buffer_type{}; + Id image_u32{}; + + std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; + std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{}; + std::vector<TextureBufferDefinition> texture_buffers; + std::vector<ImageBufferDefinition> image_buffers; + std::vector<TextureDefinition> textures; + std::vector<ImageDefinition> images; + + Id workgroup_id{}; + Id local_invocation_id{}; + Id invocation_id{}; + Id sample_id{}; + Id is_helper_invocation{}; + Id subgroup_local_invocation_id{}; + Id subgroup_mask_eq{}; + Id subgroup_mask_lt{}; + Id subgroup_mask_le{}; + Id subgroup_mask_gt{}; + Id subgroup_mask_ge{}; + Id instance_id{}; + Id instance_index{}; + Id base_instance{}; + Id vertex_id{}; + Id vertex_index{}; + Id base_vertex{}; + Id front_face{}; + Id point_coord{}; + Id tess_coord{}; + Id clip_distances{}; + Id layer{}; + Id viewport_index{}; + Id viewport_mask{}; + Id primitive_id{}; + + Id fswzadd_lut_a{}; + Id fswzadd_lut_b{}; + + Id indexed_load_func{}; + Id indexed_store_func{}; + + Id local_memory{}; + + Id shared_memory_u8{}; + Id shared_memory_u16{}; + Id shared_memory_u32{}; + Id shared_memory_u64{}; + Id shared_memory_u32x2{}; + Id shared_memory_u32x4{}; + + Id shared_memory_u32_type{}; + + Id shared_store_u8_func{}; + Id shared_store_u16_func{}; + Id increment_cas_shared{}; + Id increment_cas_ssbo{}; + Id decrement_cas_shared{}; + Id decrement_cas_ssbo{}; + Id f32_add_cas{}; + Id f16x2_add_cas{}; + Id f16x2_min_cas{}; + Id f16x2_max_cas{}; + Id f32x2_add_cas{}; + Id f32x2_min_cas{}; + Id f32x2_max_cas{}; + + Id load_global_func_u32{}; + Id load_global_func_u32x2{}; + Id load_global_func_u32x4{}; + Id write_global_func_u32{}; + Id write_global_func_u32x2{}; + Id write_global_func_u32x4{}; + + Id input_position{}; + std::array<Id, 32> input_generics{}; + + Id output_point_size{}; + Id output_position{}; + std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; + + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + std::array<Id, 30> patches{}; + + std::array<Id, 8> frag_color{}; + Id sample_mask{}; + Id frag_depth{}; + + std::vector<Id> interfaces; + +private: + void DefineCommonTypes(const Info& info); + void DefineCommonConstants(); + void DefineInterfaces(const IR::Program& program); + void DefineLocalMemory(const IR::Program& program); + void DefineSharedMemory(const IR::Program& program); + void DefineSharedMemoryFunctions(const IR::Program& program); + void DefineConstantBuffers(const Info& info, u32& binding); + void DefineStorageBuffers(const Info& info, u32& binding); + void DefineTextureBuffers(const Info& info, u32& binding); + void DefineImageBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); + void DefineImages(const Info& info, u32& binding); + void DefineAttributeMemAccess(const Info& info); + void DefineGlobalMemoryFunctions(const Info& info); + + void DefineInputs(const IR::Program& program); + void DefineOutputs(const IR::Program& program); +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 000000000..d7a86e270 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -0,0 +1,541 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <span> +#include <tuple> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/settings.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::SPIRV { +namespace { +template <class Func> +struct FuncTraits {}; + +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; +}; + +template <auto func, typename... Args> +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); +} + +template <typename ArgType> +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v<ArgType, Id>) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { + return arg; + } else if constexpr (std::is_same_v<ArgType, u32>) { + return arg.U32(); + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return arg.Attribute(); + } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { + return arg.Patch(); + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return arg.Reg(); + } +} + +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; + if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { + if constexpr (is_first_arg_inst) { + SetDefinition<func>( + ctx, inst, inst, + Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + SetDefinition<func>( + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); + } + } +} + +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>; + using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); + } +} + +void Traverse(EmitContext& ctx, IR::Program& program) { + IR::Block* current_block{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: { + const Id label{node.data.block->Definition<Id>()}; + if (current_block) { + ctx.OpBranch(label); + } + current_block = node.data.block; + ctx.AddLabel(label); + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + } + case IR::AbstractSyntaxNode::Type::If: { + const Id if_label{node.data.if_node.body->Definition<Id>()}; + const Id endif_label{node.data.if_node.merge->Definition<Id>()}; + ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: { + const Id body_label{node.data.loop.body->Definition<Id>()}; + const Id continue_label{node.data.loop.continue_block->Definition<Id>()}; + const Id endloop_label{node.data.loop.merge->Definition<Id>()}; + + ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); + ctx.OpBranch(body_label); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + const Id break_label{node.data.break_node.merge->Definition<Id>()}; + const Id skip_label{node.data.break_node.skip->Definition<Id>()}; + ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label); + break; + } + case IR::AbstractSyntaxNode::Type::EndIf: + if (current_block) { + ctx.OpBranch(node.data.end_if.merge->Definition<Id>()); + } + break; + case IR::AbstractSyntaxNode::Type::Repeat: { + Id cond{ctx.Def(node.data.repeat.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])}; + const Id safety_counter{ctx.AddGlobalVariable( + pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))}; + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(safety_counter); + } + const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)}; + const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))}; + ctx.OpStore(safety_counter, new_counter); + + const Id safety_cond{ + ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)}; + cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond); + } + const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()}; + const Id merge_label{node.data.repeat.merge->Definition<Id>()}; + ctx.OpBranchConditional(cond, loop_header_label, merge_label); + break; + } + case IR::AbstractSyntaxNode::Type::Return: + ctx.OpReturn(); + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.OpUnreachable(); + break; + } + if (node.type != IR::AbstractSyntaxNode::Type::Block) { + current_block = nullptr; + } + } +} + +Id DefineMain(EmitContext& ctx, IR::Program& program) { + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + block->SetDefinition(ctx.OpLabel()); + } + Traverse(ctx, program); + ctx.OpFunctionEnd(); + return main; +} + +spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Isolines: + return spv::ExecutionMode::Isolines; + case TessPrimitive::Triangles: + return spv::ExecutionMode::Triangles; + case TessPrimitive::Quads: + return spv::ExecutionMode::Quads; + } + throw InvalidArgument("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return spv::ExecutionMode::SpacingEqual; + case TessSpacing::FractionalOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case TessSpacing::FractionalEven: + return spv::ExecutionMode::SpacingFractionalEven; + } + throw InvalidArgument("Tessellation spacing {}", spacing); +} + +void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (program.stage) { + case Stage::Compute: { + const std::array<u32, 3> workgroup_size{program.workgroup_size}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; + } + case Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Stage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); + break; + case Stage::TessellationEval: + execution_model = spv::ExecutionModel::TessellationEvaluation; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); + ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise + ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); + break; + case Stage::Geometry: + execution_model = spv::ExecutionModel::Geometry; + ctx.AddCapability(spv::Capability::Geometry); + ctx.AddCapability(spv::Capability::GeometryStreams); + switch (ctx.runtime_info.input_topology) { + case InputTopology::Points: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); + break; + case InputTopology::Lines: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); + break; + case InputTopology::LinesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); + break; + case InputTopology::Triangles: + ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); + break; + case InputTopology::TrianglesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); + break; + } + switch (program.output_topology) { + case OutputTopology::PointList: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); + break; + case OutputTopology::LineStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); + break; + case OutputTopology::TriangleStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); + break; + } + if (program.info.stores[IR::Attribute::PointSize]) { + ctx.AddCapability(spv::Capability::GeometryPointSize); + } + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); + ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } + break; + case Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + if (ctx.profile.lower_left_origin_mode) { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); + } else { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + } + if (program.info.stores_frag_depth) { + ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + } + if (ctx.runtime_info.force_early_z) { + ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); + } + break; + default: + throw NotImplementedException("Stage {}", program.stage); + } + ctx.AddEntryPoint(execution_model, main, "main", interfaces); +} + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + const Info& info{program.info}; + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp16_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); + } + } +} + +void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, + EmitContext& ctx, Id main_func) { + if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { + return; + } + if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); + } + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + } + if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U); + } +} + +void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } + if (info.stores[IR::Attribute::ViewportIndex]) { + ctx.AddCapability(spv::Capability::MultiViewport); + } + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { + ctx.AddExtension("SPV_NV_viewport_array2"); + ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); + } + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { + if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { + ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); + ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); + } + } + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { + ctx.AddExtension("SPV_KHR_shader_draw_parameters"); + ctx.AddCapability(spv::Capability::DrawParameters); + } + if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id || + info.uses_subgroup_shuffles) && + profile.support_vote) { + ctx.AddExtension("SPV_KHR_shader_ballot"); + ctx.AddCapability(spv::Capability::SubgroupBallotKHR); + if (!profile.warp_size_potentially_larger_than_guest) { + // vote ops are only used when not taking the long path + ctx.AddExtension("SPV_KHR_subgroup_vote"); + ctx.AddCapability(spv::Capability::SubgroupVoteKHR); + } + } + if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { + ctx.AddCapability(spv::Capability::Int64Atomics); + } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } + if (info.uses_typeless_image_writes) { + ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + } + if (info.uses_image_buffers) { + ctx.AddCapability(spv::Capability::ImageBuffer); + } + if (info.uses_sample_id) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } + if (!ctx.runtime_info.xfb_varyings.empty()) { + ctx.AddCapability(spv::Capability::TransformFeedback); + } + if (info.uses_derivatives) { + ctx.AddCapability(spv::Capability::DerivativeControl); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); + ctx.AddCapability(spv::Capability::ImageQuery); + ctx.AddCapability(spv::Capability::SampledBuffer); +} + +void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { + auto inst{program.blocks.front()->begin()}; + size_t block_index{0}; + ctx.PatchDeferredPhi([&](size_t phi_arg) { + if (phi_arg == 0) { + ++inst; + if (inst == program.blocks[block_index]->end() || + inst->GetOpcode() != IR::Opcode::Phi) { + do { + ++block_index; + inst = program.blocks[block_index]->begin(); + } while (inst->GetOpcode() != IR::Opcode::Phi); + } + } + return ctx.Def(inst->Arg(phi_arg)); + }); +} +} // Anonymous namespace + +std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings) { + EmitContext ctx{profile, runtime_info, program, bindings}; + const Id main{DefineMain(ctx, program)}; + DefineEntryPoint(program, ctx, main); + if (profile.support_float_controls) { + ctx.AddExtension("SPV_KHR_float_controls"); + SetupDenormControl(profile, program, ctx, main); + SetupSignedNanCapabilities(profile, program, ctx, main); + } + SetupCapabilities(profile, program.info, ctx); + PatchPhiNodes(program, ctx); + return ctx.Assemble(); +} + +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { + const size_t num_args{inst->NumArgs()}; + boost::container::small_vector<Id, 32> blocks; + blocks.reserve(num_args); + for (size_t index = 0; index < num_args; ++index) { + blocks.push_back(inst->PhiBlock(index)->Definition<Id>()); + } + // The type of a phi instruction is stored in its flags + const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())}; + return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size())); +} + +void EmitVoid(EmitContext&) {} + +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { + const Id id{ctx.Def(value)}; + if (!Sirit::ValidId(id)) { + throw NotImplementedException("Forward identity declaration"); + } + return id; +} + +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { + const Id id{ctx.Def(value)}; + if (!Sirit::ValidId(id)) { + throw NotImplementedException("Forward identity declaration"); + } + return id; +} + +void EmitReference(EmitContext&) {} + +void EmitPhiMove(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetZeroFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetSignFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetCarryFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetOverflowFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetSparseFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetInBoundsFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 000000000..db0c935fe --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include <sirit/sirit.h> + +#include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::SPIRV { + +[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); + +[[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, {}, program, binding); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..9af8bb9e1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -0,0 +1,448 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { + const Id shift_id{ctx.Const(2U)}; + Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + if (index_offset > 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); + } + return ctx.profile.support_explicit_workgroup_layout + ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) + : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); +} + +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)}; + return ctx.Const(imm_offset); + } + const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Const(shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + +Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_ptr, const IR::Value& binding, + const IR::Value& offset, size_t element_size) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{SharedPointer(ctx, offset)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, + offset, sizeof(u32))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); + } + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; + ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); + return original_value; +} +} // Anonymous namespace + +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { + const Id shift_id{ctx.Const(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); +} + +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { + const Id shift_id{ctx.Const(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); +} + +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { + const Id shift_id{ctx.Const(3U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); + } + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); +} + +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, + &Sirit::Module::OpIAdd); +} + +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, + &Sirit::Module::OpSMin); +} + +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, + &Sirit::Module::OpUMin); +} + +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, + &Sirit::Module::OpSMax); +} + +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, + &Sirit::Module::OpUMax); +} + +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, + &Sirit::Module::OpBitwiseAnd); +} + +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, + &Sirit::Module::OpBitwiseOr); +} + +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, + &Sirit::Module::OpBitwiseXor); +} + +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); + } + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + ctx.OpStore(pointer, value); + return original; +} + +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); +} + +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()].U32}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp new file mode 100644 index 000000000..e0b52a001 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { + const auto semantics{ + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | + spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | + spv::MemorySemanticsMask::ImageMemory}; + ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics))); +} +} // Anonymous namespace + +void EmitBarrier(EmitContext& ctx) { + const auto execution{spv::Scope::Workgroup}; + const auto memory{spv::Scope::Workgroup}; + const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | + spv::MemorySemanticsMask::WorkgroupMemory}; + ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)), + ctx.Const(static_cast<u32>(memory)), + ctx.Const(static_cast<u32>(memory_semantics))); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + MemoryBarrier(ctx, spv::Scope::Workgroup); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + MemoryBarrier(ctx, spv::Scope::Device); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 000000000..bb11f4f4e --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +void EmitBitCastU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); +} + +void EmitBitCastU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitBitCastF16U16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F32[1], value); +} + +void EmitBitCastF64U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitPackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U64, value); +} + +Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + +Id EmitPackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); +} + +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F16[2], value); +} + +Id EmitPackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpPackHalf2x16(ctx.U32[1], value); +} + +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackHalf2x16(ctx.F32[2], value); +} + +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F64[1], value); +} + +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 000000000..10ff4ecab --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -0,0 +1,155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { + +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); +} + +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); +} + +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); +} + +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); +} + +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); +} + +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); +} + +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); +} + +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + +void EmitCompositeConstructF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 000000000..fb8c02a77 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -0,0 +1,505 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <tuple> +#include <utility> + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + +template <typename... Args> +Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: + return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...); + default: + return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...); + } +} + +template <typename... Args> +Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { + if (ctx.stage == Stage::TessellationControl) { + const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; + return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...); + } else { + return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...); + } +} + +struct OutAttr { + OutAttr(Id pointer_) : pointer{pointer_} {} + OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {} + + Id pointer{}; + Id type{}; +}; + +std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + if (info.num_components == 1) { + return info.id; + } else { + const u32 index_element{element - info.first_element}; + const Id index_id{ctx.Const(index_element)}; + return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); + } + } + switch (attr) { + case IR::Attribute::PointSize: + return ctx.output_point_size; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: { + const u32 element{static_cast<u32>(attr) % 4}; + const Id element_id{ctx.Const(element)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); + } + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; + const u32 index{static_cast<u32>(attr) - base}; + const Id clip_num{ctx.Const(index)}; + return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); + } + case IR::Attribute::Layer: + if (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) { + return OutAttr{ctx.layer, ctx.U32[1]}; + } + return std::nullopt; + case IR::Attribute::ViewportIndex: + if (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) { + return OutAttr{ctx.viewport_index, ctx.U32[1]}; + } + return std::nullopt; + case IR::Attribute::ViewportMask: + if (!ctx.profile.support_viewport_mask) { + return std::nullopt; + } + return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value), + ctx.U32[1]}; + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, + const IR::Value& binding, const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Constant buffer indexing"); + } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const Id uniform_type{ctx.uniform_types.*member_ptr}; + if (!offset.IsImmediate()) { + Id index{ctx.Def(offset)}; + if (element_size > 1) { + const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; + const Id shift{ctx.Const(log2_element_size)}; + index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; + return ctx.OpLoad(result_type, access_chain); + } + // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) + const Id imm_offset{ctx.Const(offset.U32() / element_size)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(result_type, access_chain); +} + +Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); +} + +Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); +} + +Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { + if (offset.IsImmediate()) { + const u32 element{(offset.U32() / 4) % 4 + index_offset}; + return ctx.OpCompositeExtract(ctx.U32[1], vector, element); + } + const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; + Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; + if (index_offset > 0) { + element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); + } + return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element); +} +} // Anonymous namespace + +void EmitGetRegister(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetRegister(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetPred(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetPred(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetGotoVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetGotoVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSetIndirectBranchVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetIndirectBranchVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); +} + +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); +} + +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { + const Id load{ + GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); +} + +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { + const Id load{ + GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + element = GetCbufElement(ctx, vector, offset, 0u); + } + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); +} + +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return GetCbufU32(ctx, binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return GetCbufElement(ctx, vector, offset, 0u); + } +} + +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); + } +} + +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, + offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), + GetCbufElement(ctx, vector, offset, 1u)); + } +} + +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { + const u32 element{static_cast<u32>(attr) % 4}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; + if (!type) { + // Attribute is disabled + return ctx.Const(element == 3 ? 1.0f : 0.0f); + } + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + // Varying component is not written + return ctx.Const(type && element == 3 ? 1.0f : 0.0f); + } + const Id generic_id{ctx.input_generics.at(index)}; + const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; + const Id value{ctx.OpLoad(type->id, pointer)}; + return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + } + switch (attr) { + case IR::Attribute::PrimitiveId: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, + ctx.Const(element))); + case IR::Attribute::InstanceId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; + return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); + } + case IR::Attribute::VertexId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; + return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); + } + case IR::Attribute::FrontFace: + return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); + case IR::Attribute::PointSpriteS: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); + case IR::Attribute::PointSpriteT: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); + + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { + const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)}; + if (!output) { + return; + } + if (Sirit::ValidId(output->type)) { + value = ctx.OpBitcast(output->type, value); + } + ctx.OpStore(output->pointer, value); +} + +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); + default: + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); + } +} + +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { + ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); +} + +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; + const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.Const(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u)); + default: + throw NotImplementedException("Patch {}", patch); + } + }()}; + ctx.OpStore(pointer, value); +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + const Id component_id{ctx.Const(component)}; + const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); +} + +void EmitSetSampleMask(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.sample_mask, value); +} + +void EmitSetFragDepth(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.frag_depth, value); +} + +void EmitGetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitWorkgroupId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); +} + +Id EmitLocalInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); +} + +Id EmitInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); +} + +Id EmitSampleId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.sample_id); +} + +Id EmitIsHelperInvocation(EmitContext& ctx) { + return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); +} + +Id EmitYDirection(EmitContext& ctx) { + return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); +} + +Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + ctx.OpStore(pointer, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp new file mode 100644 index 000000000..d33486f28 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx) { + if (ctx.profile.support_demote_to_helper_invocation) { + ctx.OpDemoteToHelperInvocationEXT(); + } else { + const Id kill_label{ctx.OpLabel()}; + const Id impossible_label{ctx.OpLabel()}; + ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label); + ctx.AddLabel(kill_label); + ctx.OpKill(); + ctx.AddLabel(impossible_label); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..fd42b7a16 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,269 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id ExtractU16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U16, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractS16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.S16, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractU8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpUConvert(ctx.U8, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} + +Id ExtractS8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpSConvert(ctx.S8, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} +} // Anonymous namespace + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); + } else { + return ctx.OpConvertFToS(ctx.U32[1], value); + } +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +Id EmitConvertF16F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F16[1], value); +} + +Id EmitConvertF32F16(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF32F64(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF64F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F64[1], value); +} + +Id EmitConvertF16S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF16S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF16S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF16U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF16U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF32S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF32S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF32S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF32U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF32U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF32U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF64S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); +} + +Id EmitConvertF64S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); +} + +Id EmitConvertF64S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); +} + +Id EmitConvertF64U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); +} + +Id EmitConvertF64U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +Id EmitConvertF64U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..61cf25f9c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -0,0 +1,396 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { + const auto flags{inst->Flags<IR::FpControl>()}; + if (flags.no_contraction) { + ctx.Decorate(op, spv::Decoration::NoContraction); + } + return op; +} + +Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { + if (ctx.profile.has_broken_spirv_clamp) { + return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); + } else { + return ctx.OpFClamp(type, value, zero, one); + } +} + +Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)}; + const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))}; + const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))}; + return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan); + } else { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + } +} + +Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)}; + const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)}; + const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan); + } else { + return (ctx.*comp_func)(ctx.U1, lhs, rhs); + } +} +} // Anonymous namespace + +Id EmitFPAbs16(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F16[1], value); +} + +Id EmitFPAbs32(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F32[1], value); +} + +Id EmitFPAbs64(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F64[1], value); +} + +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); +} + +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); +} + +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); +} + +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); +} + +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); +} + +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); +} + +Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F32[1], a, b); +} + +Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F64[1], a, b); +} + +Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F32[1], a, b); +} + +Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F64[1], a, b); +} + +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); +} + +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); +} + +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); +} + +Id EmitFPNeg16(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F16[1], value); +} + +Id EmitFPNeg32(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F32[1], value); +} + +Id EmitFPNeg64(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F64[1], value); +} + +Id EmitFPSin(EmitContext& ctx, Id value) { + return ctx.OpSin(ctx.F32[1], value); +} + +Id EmitFPCos(EmitContext& ctx, Id value) { + return ctx.OpCos(ctx.F32[1], value); +} + +Id EmitFPExp2(EmitContext& ctx, Id value) { + return ctx.OpExp2(ctx.F32[1], value); +} + +Id EmitFPLog2(EmitContext& ctx, Id value) { + return ctx.OpLog2(ctx.F32[1], value); +} + +Id EmitFPRecip32(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value); +} + +Id EmitFPRecip64(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); +} + +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); +} + +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F64[1], value); +} + +Id EmitFPSqrt(EmitContext& ctx, Id value) { + return ctx.OpSqrt(ctx.F32[1], value); +} + +Id EmitFPSaturate16(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; + const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; + return Clamp(ctx, ctx.F16[1], value, zero, one); +} + +Id EmitFPSaturate32(EmitContext& ctx, Id value) { + const Id zero{ctx.Const(f32{0.0})}; + const Id one{ctx.Const(f32{1.0})}; + return Clamp(ctx, ctx.F32[1], value, zero, one); +} + +Id EmitFPSaturate64(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; + const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; + return Clamp(ctx, ctx.F64[1], value, zero, one); +} + +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F16[1], value, min_value, max_value); +} + +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F32[1], value, min_value, max_value); +} + +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F64[1], value, min_value, max_value); +} + +Id EmitFPRoundEven16(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F16[1], value); +} + +Id EmitFPRoundEven32(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F32[1], value); +} + +Id EmitFPRoundEven64(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F64[1], value); +} + +Id EmitFPFloor16(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F16[1], value); +} + +Id EmitFPFloor32(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F32[1], value); +} + +Id EmitFPFloor64(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F64[1], value); +} + +Id EmitFPCeil16(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F16[1], value); +} + +Id EmitFPCeil32(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F32[1], value); +} + +Id EmitFPCeil64(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F64[1], value); +} + +Id EmitFPTrunc16(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F16[1], value); +} + +Id EmitFPTrunc32(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F32[1], value); +} + +Id EmitFPTrunc64(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F64[1], value); +} + +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); +} + +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPOrdNotEqual(ctx, lhs, rhs); +} + +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPOrdNotEqual(ctx, lhs, rhs); +} + +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPOrdNotEqual(ctx, lhs, rhs); +} + +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); +} + +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); +} + +Id EmitFPIsNan16(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + +Id EmitFPIsNan32(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + +Id EmitFPIsNan64(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..3588f052b --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -0,0 +1,462 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <boost/container/static_vector.hpp> + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +class ImageOperands { +public: + explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, + Id lod, const IR::Value& offset) { + if (has_bias) { + const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Bias, bias); + } + if (has_lod) { + const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Lod, lod_value); + } + AddOffset(ctx, offset); + if (has_lod_clamp) { + const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) { + if (offset2.IsEmpty()) { + if (offset.IsEmpty()) { + return; + } + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + return; + } + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring"); + return; + } + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + const Id offsets{ctx.ConstantComposite( + ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)), + ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)), + ctx.Const(read(1, 2), read(1, 3)))}; + Add(spv::ImageOperandsMask::ConstOffsets, offsets); + } + + explicit ImageOperands(Id offset, Id lod, Id ms) { + if (Sirit::ValidId(lod)) { + Add(spv::ImageOperandsMask::Lod, lod); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (Sirit::ValidId(ms)) { + Add(spv::ImageOperandsMask::Sample, ms); + } + } + + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, + Id offset, Id lod_clamp) { + if (!Sirit::ValidId(derivates)) { + throw LogicError("Derivates must be present"); + } + boost::container::static_vector<Id, 3> deriv_x_accum; + boost::container::static_vector<Id, 3> deriv_y_accum; + for (u32 i = 0; i < num_derivates; ++i) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + } + const Id derivates_X{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + const Id derivates_Y{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + std::span<const Id> Span() const noexcept { + return std::span{operands.data(), operands.size()}; + } + + std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept { + return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt; + } + + spv::ImageOperandsMask Mask() const noexcept { + return mask; + } + +private: + void AddOffset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return; + } + if (offset.IsImmediate()) { + Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32()))); + return; + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), + static_cast<s32>(inst->Arg(1).U32()))); + return; + case IR::Opcode::CompositeConstructU32x3: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), + static_cast<s32>(inst->Arg(1).U32()), + static_cast<s32>(inst->Arg(2).U32()))); + return; + case IR::Opcode::CompositeConstructU32x4: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), + static_cast<s32>(inst->Arg(1).U32()), + static_cast<s32>(inst->Arg(2).U32()), + static_cast<s32>(inst->Arg(3).U32()))); + return; + default: + break; + } + } + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + } + + void Add(spv::ImageOperandsMask new_mask, Id value) { + mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | + static_cast<unsigned>(new_mask)); + operands.push_back(value); + } + + void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { + mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | + static_cast<unsigned>(new_mask)); + operands.push_back(value_1); + operands.push_back(value_2); + } + + boost::container::static_vector<Id, 4> operands; + spv::ImageOperandsMask mask{}; +}; + +Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; + return ctx.OpLoad(def.sampled_type, pointer); + } else { + return ctx.OpLoad(def.sampled_type, def.id); + } +} + +Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) { + if (!index.IsImmediate() || index.U32() != 0) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + const Id sampler_id{def.id}; + const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; + return ctx.OpImage(ctx.image_buffer_type, id); + } else { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); + } +} + +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate() || index.U32() != 0) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; + return ctx.OpLoad(def.image_type, def.id); + } else { + const ImageDefinition def{ctx.images.at(info.descriptor_index)}; + return ctx.OpLoad(def.image_type, def.id); + } +} + +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (info.relaxed_precision != 0) { + ctx.Decorate(sample, spv::Decoration::RelaxedPrecision); + } + return sample; +} + +template <typename MethodPtrType, typename... Args> +Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, + Id result_type, Args&&... args) { + IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (!sparse) { + return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...)); + } + const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; + const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; + const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; + sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); + sparse->Invalidate(); + Decorate(ctx, inst, sample); + return ctx.OpCompositeExtract(result_type, sample, 1U); +} +} // Anonymous namespace + +Id EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (ctx.stage == Stage::Fragment) { + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, + bias_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + } else { + // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as + // if the lod was explicitly zero. This may change on Turing with implicit compute + // derivatives + const Id lod{ctx.Const(0.0f)}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + } +} + +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, false, true, false, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, + &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); +} + +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod, const IR::Value& offset) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, false, true, false, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); +} + +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, offset, offset2); + return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, + ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), + operands.MaskOptional(), operands.Span()); +} + +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, offset, offset2); + return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, + ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), + operands.Span()); +} + +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (info.type == TextureType::Buffer) { + lod = Id{}; + } + const ImageOperands operands(offset, lod, ms); + return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], + TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); +} + +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const Id image{TextureImage(ctx, info, index)}; + const Id zero{ctx.u32_zero_value}; + const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + switch (info.type) { + case TextureType::Color1D: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), + zero, zero, mips()); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), + zero, mips()); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), + mips()); + case TextureType::Buffer: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, + zero, mips()); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const Id zero{ctx.f32_zero_value}; + const Id sampler{Texture(ctx, info, index)}; + return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), + zero, zero); +} + +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, + offset, lod_clamp); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); +} + +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { + LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); + return ctx.ConstantNull(ctx.U32[4]); + } + return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], + Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{}); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + ctx.OpImageWrite(Image(ctx, index, info), coords, color); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp new file mode 100644 index 000000000..d7f1a365a --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -0,0 +1,183 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; + return def.id; + } else { + const ImageDefinition def{ctx.images.at(index.U32())}; + return def.id; + } +} + +std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; + const Id image{Image(ctx, index, info)}; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} +} // Anonymous namespace + +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..f99c02848 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,579 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <sirit/sirit.h> + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext&); +void EmitJoin(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 000000000..3501d7495 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -0,0 +1,270 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); + sign->Invalidate(); +} +} // Anonymous namespace + +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + Id result{}; + if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; + const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; + result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + + const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; + carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); + carry->Invalidate(); + } else { + result = ctx.OpIAdd(ctx.U32[1], a, b); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; + const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; + + const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; + const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; + const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; + overflow->SetDefinition(carry_flag); + overflow->Invalidate(); + } + return result; +} + +Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIAdd(ctx.U64, a, b); +} + +Id EmitISub32(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U32[1], a, b); +} + +Id EmitISub64(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U64, a, b); +} + +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.U32[1], a, b); +} + +Id EmitINeg32(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U32[1], value); +} + +Id EmitINeg64(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U64, value); +} + +Id EmitIAbs32(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U32[1], value); +} + +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); +} + +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U32[1], base, shift); +} + +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift); +} + +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); +} + +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { + return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); +} + +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitBitReverse32(EmitContext& ctx, Id value) { + return ctx.OpBitReverse(ctx.U32[1], value); +} + +Id EmitBitCount32(EmitContext& ctx, Id value) { + return ctx.OpBitCount(ctx.U32[1], value); +} + +Id EmitBitwiseNot32(EmitContext& ctx, Id value) { + return ctx.OpNot(ctx.U32[1], value); +} + +Id EmitFindSMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindSMsb(ctx.U32[1], value); +} + +Id EmitFindUMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindUMsb(ctx.U32[1], value); +} + +Id EmitSMin32(EmitContext& ctx, Id a, Id b) { + const bool is_broken{ctx.profile.has_broken_signed_operations}; + if (is_broken) { + a = ctx.OpBitcast(ctx.S32[1], a); + b = ctx.OpBitcast(ctx.S32[1], b); + } + const Id result{ctx.OpSMin(ctx.U32[1], a, b)}; + return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; +} + +Id EmitUMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMin(ctx.U32[1], a, b); +} + +Id EmitSMax32(EmitContext& ctx, Id a, Id b) { + const bool is_broken{ctx.profile.has_broken_signed_operations}; + if (is_broken) { + a = ctx.OpBitcast(ctx.S32[1], a); + b = ctx.OpBitcast(ctx.S32[1], b); + } + const Id result{ctx.OpSMax(ctx.U32[1], a, b)}; + return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; +} + +Id EmitUMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMax(ctx.U32[1], a, b); +} + +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + Id result{}; + if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) { + value = ctx.OpBitcast(ctx.S32[1], value); + min = ctx.OpBitcast(ctx.S32[1], min); + max = ctx.OpBitcast(ctx.S32[1], max); + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min); + } else { + result = ctx.OpSClamp(ctx.S32[1], value, min, max); + } + result = ctx.OpBitcast(ctx.U32[1], result); + } else { + result = ctx.OpSClamp(ctx.U32[1], value, min, max); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + Id result{}; + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min); + } else { + result = ctx.OpUClamp(ctx.U32[1], value, min, max); + } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.U1, lhs, rhs); +} + +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1, lhs, rhs); +} + +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1, lhs, rhs); +} + +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpINotEqual(ctx.U1, lhs, rhs); +} + +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 000000000..b9a9500fc --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalOr(ctx.U1, a, b); +} + +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalAnd(ctx.U1, a, b); +} + +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalNotEqual(ctx.U1, a, b); +} + +Id EmitLogicalNot(EmitContext& ctx, Id value) { + return ctx.OpLogicalNot(ctx.U1, value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp new file mode 100644 index 000000000..679ee2684 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -0,0 +1,275 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <bit> + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size, + u32 index_offset = 0) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset}; + return ctx.Const(imm_offset); + } + const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; + Id index{ctx.Def(offset)}; + if (shift != 0) { + const Id shift_id{ctx.Const(shift)}; + index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); + } + if (index_offset != 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); + } + return index; +} + +Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size, index_offset)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + const Id pointer{ + StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; + return ctx.OpLoad(result_type, pointer); +} + +Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 index_offset = 0) { + return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32, index_offset); +} + +void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + const Id pointer{ + StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; + ctx.OpStore(pointer, value); +} + +void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + u32 index_offset = 0) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32, index_offset); +} +} // Anonymous namespace + +void EmitLoadGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitLoadGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitLoadGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitLoadGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitLoadGlobal32(EmitContext& ctx, Id address) { + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u); +} + +Id EmitLoadGlobal64(EmitContext& ctx, Id address) { + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u); +} + +Id EmitLoadGlobal128(EmitContext& ctx, Id address) { + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u, 0u, 0u); +} + +void EmitWriteGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); +} + +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); +} + +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8)); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset8(offset), ctx.Const(8u)); + } +} + +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8)); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset8(offset), ctx.Const(8u)); + } +} + +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16)); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset16(offset), ctx.Const(16u)); + } +} + +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16)); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset16(offset), ctx.Const(16u)); + } +} + +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return LoadStorage32(ctx, binding, offset); +} + +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, + sizeof(u32[2]), &StorageDefinitions::U32x2); + } else { + return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0), + LoadStorage32(ctx, binding, offset, 1)); + } +} + +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (ctx.profile.support_descriptor_aliasing) { + return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, + sizeof(u32[4]), &StorageDefinitions::U32x4); + } else { + return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0), + LoadStorage32(ctx, binding, offset, 1), + LoadStorage32(ctx, binding, offset, 2), + LoadStorage32(ctx, binding, offset, 3)); + } +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage32(ctx, binding, offset, value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (ctx.profile.support_descriptor_aliasing) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); + } else { + for (u32 index = 0; index < 2; ++index) { + const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; + WriteStorage32(ctx, binding, offset, element, index); + } + } +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (ctx.profile.support_descriptor_aliasing) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); + } else { + for (u32 index = 0; index < 4; ++index) { + const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; + WriteStorage32(ctx, binding, offset, element, index); + } + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 000000000..c5b4f4720 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -0,0 +1,42 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U1, cond, true_value, false_value); +} + +Id EmitSelectU8(EmitContext&, Id, Id, Id) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U16, cond, true_value, false_value); +} + +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); +} + +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U64, cond, true_value, false_value); +} + +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); +} + +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); +} + +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..9a79fc7a2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -0,0 +1,174 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { + const Id shift_id{ctx.Const(shift)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); +} + +Id Word(EmitContext& ctx, Id offset) { + const Id shift_id{ctx.Const(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))}; + const Id count_id{ctx.Const(count)}; + return {bit, count_id}; +} +} // Anonymous namespace + +Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; + return ctx.OpLoad(ctx.U32[1], pointer); + } else { + return Word(ctx, offset); + } +} + +Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + return ctx.OpLoad(ctx.U32[2], pointer); + } else { + const Id shift_id{ctx.Const(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } +} + +Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + return ctx.OpLoad(ctx.U32[4], pointer); + } + const Id shift_id{ctx.Const(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + std::array<Id, 4> values{}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } + return ctx.OpCompositeConstruct(ctx.U32[4], values); +} + +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); + } +} + +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); + } +} + +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { + Id pointer{}; + if (ctx.profile.support_explicit_workgroup_layout) { + pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); + } else { + const Id shift{ctx.Const(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + } + ctx.OpStore(pointer, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Const(2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); +} + +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Const(2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 000000000..9e7eb3cb1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -0,0 +1,150 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +void ConvertDepthMode(EmitContext& ctx) { + const Id type{ctx.F32[1]}; + const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; + const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; + ctx.OpStore(ctx.output_position, vector); +} + +void SetFixedPipelinePointSize(EmitContext& ctx) { + if (ctx.runtime_info.fixed_state_point_size) { + const float point_size{*ctx.runtime_info.fixed_state_point_size}; + ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); + } +} + +Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one, + Id default_vector) { + switch (num_components) { + case 1: + return element == 3 ? one : zero; + case 2: + return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero); + case 3: + return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero); + case 4: + return default_vector; + } + throw InvalidArgument("Bad element"); +} + +Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) { + switch (comparison) { + case CompareFunction::Never: + return ctx.false_value; + case CompareFunction::Less: + return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2); + case CompareFunction::Equal: + return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::LessThanEqual: + return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Greater: + return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2); + case CompareFunction::NotEqual: + return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::GreaterThanEqual: + return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Always: + return ctx.true_value; + } + throw InvalidArgument("Comparison function {}", comparison); +} + +void AlphaTest(EmitContext& ctx) { + if (!ctx.runtime_info.alpha_test_func) { + return; + } + const auto comparison{*ctx.runtime_info.alpha_test_func}; + if (comparison == CompareFunction::Always) { + return; + } + if (!Sirit::ValidId(ctx.frag_color[0])) { + return; + } + + const Id type{ctx.F32[1]}; + const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; + const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; + + const Id true_label{ctx.OpLabel()}; + const Id discard_label{ctx.OpLabel()}; + const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; + const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; + + ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(condition, true_label, discard_label); + ctx.AddLabel(discard_label); + ctx.OpKill(); + ctx.AddLabel(true_label); +} +} // Anonymous namespace + +void EmitPrologue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB) { + const Id zero{ctx.Const(0.0f)}; + const Id one{ctx.Const(1.0f)}; + const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; + ctx.OpStore(ctx.output_position, default_vector); + for (const auto& info : ctx.output_generics) { + if (info[0].num_components == 0) { + continue; + } + u32 element{0}; + while (element < 4) { + const auto& element_info{info[element]}; + const u32 num{element_info.num_components}; + const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)}; + ctx.OpStore(element_info.id, value); + element += num; + } + } + } + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + SetFixedPipelinePointSize(ctx); + } +} + +void EmitEpilogue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { + ConvertDepthMode(ctx); + } + if (ctx.stage == Stage::Fragment) { + AlphaTest(ctx); + } +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + if (ctx.runtime_info.convert_depth_mode) { + ConvertDepthMode(ctx); + } + if (stream.IsImmediate()) { + ctx.OpEmitStreamVertex(ctx.Def(stream)); + } else { + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); + ctx.OpEmitStreamVertex(ctx.u32_zero_value); + } + // Restore fixed pipeline point size after emitting the vertex + SetFixedPipelinePointSize(ctx); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + if (stream.IsImmediate()) { + ctx.OpEndStreamPrimitive(ctx.Def(stream)); + } else { + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); + ctx.OpEndStreamPrimitive(ctx.u32_zero_value); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 000000000..c9f469e90 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { + +Id EmitUndefU1(EmitContext& ctx) { + return ctx.OpUndef(ctx.U1); +} + +Id EmitUndefU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitUndefU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitUndefU32(EmitContext& ctx) { + return ctx.OpUndef(ctx.U32[1]); +} + +Id EmitUndefU64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..78b1e1ba7 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id WarpExtract(EmitContext& ctx, Id value) { + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); +} + +Id LoadMask(EmitContext& ctx, Id mask) { + const Id value{ctx.OpLoad(ctx.U32[4], mask)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); + } + return WarpExtract(ctx, value); +} + +void SetInBoundsFlag(IR::Inst* inst, Id result) { + IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + in_bounds->SetDefinition(result); + in_bounds->Invalidate(); +} + +Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { + return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); +} + +Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { + return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, + ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); +} + +Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); +} + +Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { + return ctx.OpSelect(ctx.U32[1], in_range, + ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); +} +} // Anonymous namespace + +Id EmitLaneId(EmitContext& ctx) { + const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return id; + } + return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); +} + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return WarpExtract(ctx, ballot); +} + +Id EmitSubgroupEqMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_eq); +} + +Id EmitSubgroupLtMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_lt); +} + +Id EmitSubgroupLeMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_le); +} + +Id EmitSubgroupGtMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_gt); +} + +Id EmitSubgroupGeMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_ge); +} + +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; + + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; + const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { + const Id three{ctx.Const(3U)}; + Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); + mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + + const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; + const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; + + const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; + const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; + return ctx.OpFAdd(ctx.F32[1], result_a, result_b); +} + +Id EmitDPdxFine(EmitContext& ctx, Id op_a) { + return ctx.OpDPdxFine(ctx.F32[1], op_a); +} + +Id EmitDPdyFine(EmitContext& ctx, Id op_a) { + return ctx.OpDPdyFine(ctx.F32[1], op_a); +} + +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) { + return ctx.OpDPdxCoarse(ctx.F32[1], op_a); +} + +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) { + return ctx.OpDPdyCoarse(ctx.F32[1], op_a); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h new file mode 100644 index 000000000..8369d0d84 --- /dev/null +++ b/src/shader_recompiler/environment.h @@ -0,0 +1,53 @@ +#pragma once + +#include <array> + +#include "common/common_types.h" +#include "shader_recompiler/program_header.h" +#include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" + +namespace Shader { + +class Environment { +public: + virtual ~Environment() = default; + + [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + + [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; + + [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; + + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; + + [[nodiscard]] virtual u32 LocalMemorySize() const = 0; + + [[nodiscard]] virtual u32 SharedMemorySize() const = 0; + + [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; + + [[nodiscard]] const ProgramHeader& SPH() const noexcept { + return sph; + } + + [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + + [[nodiscard]] Stage ShaderStage() const noexcept { + return stage; + } + + [[nodiscard]] u32 StartAddress() const noexcept { + return start_address; + } + +protected: + ProgramHeader sph{}; + std::array<u32, 8> gp_passthrough_mask{}; + Stage stage{}; + u32 start_address{}; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h new file mode 100644 index 000000000..337e7f0c8 --- /dev/null +++ b/src/shader_recompiler/exception.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <stdexcept> +#include <string> +#include <string_view> +#include <utility> + +#include <fmt/format.h> + +namespace Shader { + +class Exception : public std::exception { +public: + explicit Exception(std::string message) noexcept : err_message{std::move(message)} {} + + const char* what() const noexcept override { + return err_message.c_str(); + } + + void Prepend(std::string_view prepend) { + err_message.insert(0, prepend); + } + + void Append(std::string_view append) { + err_message += append; + } + +private: + std::string err_message; +}; + +class LogicError : public Exception { +public: + template <typename... Args> + LogicError(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} +}; + +class RuntimeError : public Exception { +public: + template <typename... Args> + RuntimeError(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} +}; + +class NotImplementedException : public Exception { +public: + template <typename... Args> + NotImplementedException(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} { + Append(" is not implemented"); + } +}; + +class InvalidArgument : public Exception { +public: + template <typename... Args> + InvalidArgument(const char* message, Args&&... args) + : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} +}; + +} // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h new file mode 100644 index 000000000..b61773487 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class Block; + +struct AbstractSyntaxNode { + enum class Type { + Block, + If, + EndIf, + Loop, + Repeat, + Break, + Return, + Unreachable, + }; + union Data { + Block* block; + struct { + U1 cond; + Block* body; + Block* merge; + } if_node; + struct { + Block* merge; + } end_if; + struct { + Block* body; + Block* continue_block; + Block* merge; + } loop; + struct { + U1 cond; + Block* loop_header; + Block* merge; + } repeat; + struct { + U1 cond; + Block* merge; + Block* skip; + } break_node; + }; + + Data data{}; + Type type{}; +}; +using AbstractSyntaxList = std::vector<AbstractSyntaxNode>; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp new file mode 100644 index 000000000..4d0b8b8e5 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -0,0 +1,454 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <fmt/format.h> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader::IR { + +bool IsGeneric(Attribute attribute) noexcept { + return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; +} + +u32 GenericAttributeIndex(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u; +} + +u32 GenericAttributeElement(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return static_cast<u32>(attribute) % 4; +} + +std::string NameOf(Attribute attribute) { + switch (attribute) { + case Attribute::PrimitiveId: + return "PrimitiveId"; + case Attribute::Layer: + return "Layer"; + case Attribute::ViewportIndex: + return "ViewportIndex"; + case Attribute::PointSize: + return "PointSize"; + case Attribute::PositionX: + return "Position.X"; + case Attribute::PositionY: + return "Position.Y"; + case Attribute::PositionZ: + return "Position.Z"; + case Attribute::PositionW: + return "Position.W"; + case Attribute::Generic0X: + return "Generic[0].X"; + case Attribute::Generic0Y: + return "Generic[0].Y"; + case Attribute::Generic0Z: + return "Generic[0].Z"; + case Attribute::Generic0W: + return "Generic[0].W"; + case Attribute::Generic1X: + return "Generic[1].X"; + case Attribute::Generic1Y: + return "Generic[1].Y"; + case Attribute::Generic1Z: + return "Generic[1].Z"; + case Attribute::Generic1W: + return "Generic[1].W"; + case Attribute::Generic2X: + return "Generic[2].X"; + case Attribute::Generic2Y: + return "Generic[2].Y"; + case Attribute::Generic2Z: + return "Generic[2].Z"; + case Attribute::Generic2W: + return "Generic[2].W"; + case Attribute::Generic3X: + return "Generic[3].X"; + case Attribute::Generic3Y: + return "Generic[3].Y"; + case Attribute::Generic3Z: + return "Generic[3].Z"; + case Attribute::Generic3W: + return "Generic[3].W"; + case Attribute::Generic4X: + return "Generic[4].X"; + case Attribute::Generic4Y: + return "Generic[4].Y"; + case Attribute::Generic4Z: + return "Generic[4].Z"; + case Attribute::Generic4W: + return "Generic[4].W"; + case Attribute::Generic5X: + return "Generic[5].X"; + case Attribute::Generic5Y: + return "Generic[5].Y"; + case Attribute::Generic5Z: + return "Generic[5].Z"; + case Attribute::Generic5W: + return "Generic[5].W"; + case Attribute::Generic6X: + return "Generic[6].X"; + case Attribute::Generic6Y: + return "Generic[6].Y"; + case Attribute::Generic6Z: + return "Generic[6].Z"; + case Attribute::Generic6W: + return "Generic[6].W"; + case Attribute::Generic7X: + return "Generic[7].X"; + case Attribute::Generic7Y: + return "Generic[7].Y"; + case Attribute::Generic7Z: + return "Generic[7].Z"; + case Attribute::Generic7W: + return "Generic[7].W"; + case Attribute::Generic8X: + return "Generic[8].X"; + case Attribute::Generic8Y: + return "Generic[8].Y"; + case Attribute::Generic8Z: + return "Generic[8].Z"; + case Attribute::Generic8W: + return "Generic[8].W"; + case Attribute::Generic9X: + return "Generic[9].X"; + case Attribute::Generic9Y: + return "Generic[9].Y"; + case Attribute::Generic9Z: + return "Generic[9].Z"; + case Attribute::Generic9W: + return "Generic[9].W"; + case Attribute::Generic10X: + return "Generic[10].X"; + case Attribute::Generic10Y: + return "Generic[10].Y"; + case Attribute::Generic10Z: + return "Generic[10].Z"; + case Attribute::Generic10W: + return "Generic[10].W"; + case Attribute::Generic11X: + return "Generic[11].X"; + case Attribute::Generic11Y: + return "Generic[11].Y"; + case Attribute::Generic11Z: + return "Generic[11].Z"; + case Attribute::Generic11W: + return "Generic[11].W"; + case Attribute::Generic12X: + return "Generic[12].X"; + case Attribute::Generic12Y: + return "Generic[12].Y"; + case Attribute::Generic12Z: + return "Generic[12].Z"; + case Attribute::Generic12W: + return "Generic[12].W"; + case Attribute::Generic13X: + return "Generic[13].X"; + case Attribute::Generic13Y: + return "Generic[13].Y"; + case Attribute::Generic13Z: + return "Generic[13].Z"; + case Attribute::Generic13W: + return "Generic[13].W"; + case Attribute::Generic14X: + return "Generic[14].X"; + case Attribute::Generic14Y: + return "Generic[14].Y"; + case Attribute::Generic14Z: + return "Generic[14].Z"; + case Attribute::Generic14W: + return "Generic[14].W"; + case Attribute::Generic15X: + return "Generic[15].X"; + case Attribute::Generic15Y: + return "Generic[15].Y"; + case Attribute::Generic15Z: + return "Generic[15].Z"; + case Attribute::Generic15W: + return "Generic[15].W"; + case Attribute::Generic16X: + return "Generic[16].X"; + case Attribute::Generic16Y: + return "Generic[16].Y"; + case Attribute::Generic16Z: + return "Generic[16].Z"; + case Attribute::Generic16W: + return "Generic[16].W"; + case Attribute::Generic17X: + return "Generic[17].X"; + case Attribute::Generic17Y: + return "Generic[17].Y"; + case Attribute::Generic17Z: + return "Generic[17].Z"; + case Attribute::Generic17W: + return "Generic[17].W"; + case Attribute::Generic18X: + return "Generic[18].X"; + case Attribute::Generic18Y: + return "Generic[18].Y"; + case Attribute::Generic18Z: + return "Generic[18].Z"; + case Attribute::Generic18W: + return "Generic[18].W"; + case Attribute::Generic19X: + return "Generic[19].X"; + case Attribute::Generic19Y: + return "Generic[19].Y"; + case Attribute::Generic19Z: + return "Generic[19].Z"; + case Attribute::Generic19W: + return "Generic[19].W"; + case Attribute::Generic20X: + return "Generic[20].X"; + case Attribute::Generic20Y: + return "Generic[20].Y"; + case Attribute::Generic20Z: + return "Generic[20].Z"; + case Attribute::Generic20W: + return "Generic[20].W"; + case Attribute::Generic21X: + return "Generic[21].X"; + case Attribute::Generic21Y: + return "Generic[21].Y"; + case Attribute::Generic21Z: + return "Generic[21].Z"; + case Attribute::Generic21W: + return "Generic[21].W"; + case Attribute::Generic22X: + return "Generic[22].X"; + case Attribute::Generic22Y: + return "Generic[22].Y"; + case Attribute::Generic22Z: + return "Generic[22].Z"; + case Attribute::Generic22W: + return "Generic[22].W"; + case Attribute::Generic23X: + return "Generic[23].X"; + case Attribute::Generic23Y: + return "Generic[23].Y"; + case Attribute::Generic23Z: + return "Generic[23].Z"; + case Attribute::Generic23W: + return "Generic[23].W"; + case Attribute::Generic24X: + return "Generic[24].X"; + case Attribute::Generic24Y: + return "Generic[24].Y"; + case Attribute::Generic24Z: + return "Generic[24].Z"; + case Attribute::Generic24W: + return "Generic[24].W"; + case Attribute::Generic25X: + return "Generic[25].X"; + case Attribute::Generic25Y: + return "Generic[25].Y"; + case Attribute::Generic25Z: + return "Generic[25].Z"; + case Attribute::Generic25W: + return "Generic[25].W"; + case Attribute::Generic26X: + return "Generic[26].X"; + case Attribute::Generic26Y: + return "Generic[26].Y"; + case Attribute::Generic26Z: + return "Generic[26].Z"; + case Attribute::Generic26W: + return "Generic[26].W"; + case Attribute::Generic27X: + return "Generic[27].X"; + case Attribute::Generic27Y: + return "Generic[27].Y"; + case Attribute::Generic27Z: + return "Generic[27].Z"; + case Attribute::Generic27W: + return "Generic[27].W"; + case Attribute::Generic28X: + return "Generic[28].X"; + case Attribute::Generic28Y: + return "Generic[28].Y"; + case Attribute::Generic28Z: + return "Generic[28].Z"; + case Attribute::Generic28W: + return "Generic[28].W"; + case Attribute::Generic29X: + return "Generic[29].X"; + case Attribute::Generic29Y: + return "Generic[29].Y"; + case Attribute::Generic29Z: + return "Generic[29].Z"; + case Attribute::Generic29W: + return "Generic[29].W"; + case Attribute::Generic30X: + return "Generic[30].X"; + case Attribute::Generic30Y: + return "Generic[30].Y"; + case Attribute::Generic30Z: + return "Generic[30].Z"; + case Attribute::Generic30W: + return "Generic[30].W"; + case Attribute::Generic31X: + return "Generic[31].X"; + case Attribute::Generic31Y: + return "Generic[31].Y"; + case Attribute::Generic31Z: + return "Generic[31].Z"; + case Attribute::Generic31W: + return "Generic[31].W"; + case Attribute::ColorFrontDiffuseR: + return "ColorFrontDiffuse.R"; + case Attribute::ColorFrontDiffuseG: + return "ColorFrontDiffuse.G"; + case Attribute::ColorFrontDiffuseB: + return "ColorFrontDiffuse.B"; + case Attribute::ColorFrontDiffuseA: + return "ColorFrontDiffuse.A"; + case Attribute::ColorFrontSpecularR: + return "ColorFrontSpecular.R"; + case Attribute::ColorFrontSpecularG: + return "ColorFrontSpecular.G"; + case Attribute::ColorFrontSpecularB: + return "ColorFrontSpecular.B"; + case Attribute::ColorFrontSpecularA: + return "ColorFrontSpecular.A"; + case Attribute::ColorBackDiffuseR: + return "ColorBackDiffuse.R"; + case Attribute::ColorBackDiffuseG: + return "ColorBackDiffuse.G"; + case Attribute::ColorBackDiffuseB: + return "ColorBackDiffuse.B"; + case Attribute::ColorBackDiffuseA: + return "ColorBackDiffuse.A"; + case Attribute::ColorBackSpecularR: + return "ColorBackSpecular.R"; + case Attribute::ColorBackSpecularG: + return "ColorBackSpecular.G"; + case Attribute::ColorBackSpecularB: + return "ColorBackSpecular.B"; + case Attribute::ColorBackSpecularA: + return "ColorBackSpecular.A"; + case Attribute::ClipDistance0: + return "ClipDistance[0]"; + case Attribute::ClipDistance1: + return "ClipDistance[1]"; + case Attribute::ClipDistance2: + return "ClipDistance[2]"; + case Attribute::ClipDistance3: + return "ClipDistance[3]"; + case Attribute::ClipDistance4: + return "ClipDistance[4]"; + case Attribute::ClipDistance5: + return "ClipDistance[5]"; + case Attribute::ClipDistance6: + return "ClipDistance[6]"; + case Attribute::ClipDistance7: + return "ClipDistance[7]"; + case Attribute::PointSpriteS: + return "PointSprite.S"; + case Attribute::PointSpriteT: + return "PointSprite.T"; + case Attribute::FogCoordinate: + return "FogCoordinate"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPoint.U"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPoint.V"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::VertexId: + return "VertexId"; + case Attribute::FixedFncTexture0S: + return "FixedFncTexture[0].S"; + case Attribute::FixedFncTexture0T: + return "FixedFncTexture[0].T"; + case Attribute::FixedFncTexture0R: + return "FixedFncTexture[0].R"; + case Attribute::FixedFncTexture0Q: + return "FixedFncTexture[0].Q"; + case Attribute::FixedFncTexture1S: + return "FixedFncTexture[1].S"; + case Attribute::FixedFncTexture1T: + return "FixedFncTexture[1].T"; + case Attribute::FixedFncTexture1R: + return "FixedFncTexture[1].R"; + case Attribute::FixedFncTexture1Q: + return "FixedFncTexture[1].Q"; + case Attribute::FixedFncTexture2S: + return "FixedFncTexture[2].S"; + case Attribute::FixedFncTexture2T: + return "FixedFncTexture[2].T"; + case Attribute::FixedFncTexture2R: + return "FixedFncTexture[2].R"; + case Attribute::FixedFncTexture2Q: + return "FixedFncTexture[2].Q"; + case Attribute::FixedFncTexture3S: + return "FixedFncTexture[3].S"; + case Attribute::FixedFncTexture3T: + return "FixedFncTexture[3].T"; + case Attribute::FixedFncTexture3R: + return "FixedFncTexture[3].R"; + case Attribute::FixedFncTexture3Q: + return "FixedFncTexture[3].Q"; + case Attribute::FixedFncTexture4S: + return "FixedFncTexture[4].S"; + case Attribute::FixedFncTexture4T: + return "FixedFncTexture[4].T"; + case Attribute::FixedFncTexture4R: + return "FixedFncTexture[4].R"; + case Attribute::FixedFncTexture4Q: + return "FixedFncTexture[4].Q"; + case Attribute::FixedFncTexture5S: + return "FixedFncTexture[5].S"; + case Attribute::FixedFncTexture5T: + return "FixedFncTexture[5].T"; + case Attribute::FixedFncTexture5R: + return "FixedFncTexture[5].R"; + case Attribute::FixedFncTexture5Q: + return "FixedFncTexture[5].Q"; + case Attribute::FixedFncTexture6S: + return "FixedFncTexture[6].S"; + case Attribute::FixedFncTexture6T: + return "FixedFncTexture[6].T"; + case Attribute::FixedFncTexture6R: + return "FixedFncTexture[6].R"; + case Attribute::FixedFncTexture6Q: + return "FixedFncTexture[6].Q"; + case Attribute::FixedFncTexture7S: + return "FixedFncTexture[7].S"; + case Attribute::FixedFncTexture7T: + return "FixedFncTexture[7].T"; + case Attribute::FixedFncTexture7R: + return "FixedFncTexture[7].R"; + case Attribute::FixedFncTexture7Q: + return "FixedFncTexture[7].Q"; + case Attribute::FixedFncTexture8S: + return "FixedFncTexture[8].S"; + case Attribute::FixedFncTexture8T: + return "FixedFncTexture[8].T"; + case Attribute::FixedFncTexture8R: + return "FixedFncTexture[8].R"; + case Attribute::FixedFncTexture8Q: + return "FixedFncTexture[8].Q"; + case Attribute::FixedFncTexture9S: + return "FixedFncTexture[9].S"; + case Attribute::FixedFncTexture9T: + return "FixedFncTexture[9].T"; + case Attribute::FixedFncTexture9R: + return "FixedFncTexture[9].R"; + case Attribute::FixedFncTexture9Q: + return "FixedFncTexture[9].Q"; + case Attribute::ViewportMask: + return "ViewportMask"; + case Attribute::FrontFace: + return "FrontFace"; + } + return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h new file mode 100644 index 000000000..ca1199494 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -0,0 +1,250 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <fmt/format.h> + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Attribute : u64 { + PrimitiveId = 24, + Layer = 25, + ViewportIndex = 26, + PointSize = 27, + PositionX = 28, + PositionY = 29, + PositionZ = 30, + PositionW = 31, + Generic0X = 32, + Generic0Y = 33, + Generic0Z = 34, + Generic0W = 35, + Generic1X = 36, + Generic1Y = 37, + Generic1Z = 38, + Generic1W = 39, + Generic2X = 40, + Generic2Y = 41, + Generic2Z = 42, + Generic2W = 43, + Generic3X = 44, + Generic3Y = 45, + Generic3Z = 46, + Generic3W = 47, + Generic4X = 48, + Generic4Y = 49, + Generic4Z = 50, + Generic4W = 51, + Generic5X = 52, + Generic5Y = 53, + Generic5Z = 54, + Generic5W = 55, + Generic6X = 56, + Generic6Y = 57, + Generic6Z = 58, + Generic6W = 59, + Generic7X = 60, + Generic7Y = 61, + Generic7Z = 62, + Generic7W = 63, + Generic8X = 64, + Generic8Y = 65, + Generic8Z = 66, + Generic8W = 67, + Generic9X = 68, + Generic9Y = 69, + Generic9Z = 70, + Generic9W = 71, + Generic10X = 72, + Generic10Y = 73, + Generic10Z = 74, + Generic10W = 75, + Generic11X = 76, + Generic11Y = 77, + Generic11Z = 78, + Generic11W = 79, + Generic12X = 80, + Generic12Y = 81, + Generic12Z = 82, + Generic12W = 83, + Generic13X = 84, + Generic13Y = 85, + Generic13Z = 86, + Generic13W = 87, + Generic14X = 88, + Generic14Y = 89, + Generic14Z = 90, + Generic14W = 91, + Generic15X = 92, + Generic15Y = 93, + Generic15Z = 94, + Generic15W = 95, + Generic16X = 96, + Generic16Y = 97, + Generic16Z = 98, + Generic16W = 99, + Generic17X = 100, + Generic17Y = 101, + Generic17Z = 102, + Generic17W = 103, + Generic18X = 104, + Generic18Y = 105, + Generic18Z = 106, + Generic18W = 107, + Generic19X = 108, + Generic19Y = 109, + Generic19Z = 110, + Generic19W = 111, + Generic20X = 112, + Generic20Y = 113, + Generic20Z = 114, + Generic20W = 115, + Generic21X = 116, + Generic21Y = 117, + Generic21Z = 118, + Generic21W = 119, + Generic22X = 120, + Generic22Y = 121, + Generic22Z = 122, + Generic22W = 123, + Generic23X = 124, + Generic23Y = 125, + Generic23Z = 126, + Generic23W = 127, + Generic24X = 128, + Generic24Y = 129, + Generic24Z = 130, + Generic24W = 131, + Generic25X = 132, + Generic25Y = 133, + Generic25Z = 134, + Generic25W = 135, + Generic26X = 136, + Generic26Y = 137, + Generic26Z = 138, + Generic26W = 139, + Generic27X = 140, + Generic27Y = 141, + Generic27Z = 142, + Generic27W = 143, + Generic28X = 144, + Generic28Y = 145, + Generic28Z = 146, + Generic28W = 147, + Generic29X = 148, + Generic29Y = 149, + Generic29Z = 150, + Generic29W = 151, + Generic30X = 152, + Generic30Y = 153, + Generic30Z = 154, + Generic30W = 155, + Generic31X = 156, + Generic31Y = 157, + Generic31Z = 158, + Generic31W = 159, + ColorFrontDiffuseR = 160, + ColorFrontDiffuseG = 161, + ColorFrontDiffuseB = 162, + ColorFrontDiffuseA = 163, + ColorFrontSpecularR = 164, + ColorFrontSpecularG = 165, + ColorFrontSpecularB = 166, + ColorFrontSpecularA = 167, + ColorBackDiffuseR = 168, + ColorBackDiffuseG = 169, + ColorBackDiffuseB = 170, + ColorBackDiffuseA = 171, + ColorBackSpecularR = 172, + ColorBackSpecularG = 173, + ColorBackSpecularB = 174, + ColorBackSpecularA = 175, + ClipDistance0 = 176, + ClipDistance1 = 177, + ClipDistance2 = 178, + ClipDistance3 = 179, + ClipDistance4 = 180, + ClipDistance5 = 181, + ClipDistance6 = 182, + ClipDistance7 = 183, + PointSpriteS = 184, + PointSpriteT = 185, + FogCoordinate = 186, + TessellationEvaluationPointU = 188, + TessellationEvaluationPointV = 189, + InstanceId = 190, + VertexId = 191, + FixedFncTexture0S = 192, + FixedFncTexture0T = 193, + FixedFncTexture0R = 194, + FixedFncTexture0Q = 195, + FixedFncTexture1S = 196, + FixedFncTexture1T = 197, + FixedFncTexture1R = 198, + FixedFncTexture1Q = 199, + FixedFncTexture2S = 200, + FixedFncTexture2T = 201, + FixedFncTexture2R = 202, + FixedFncTexture2Q = 203, + FixedFncTexture3S = 204, + FixedFncTexture3T = 205, + FixedFncTexture3R = 206, + FixedFncTexture3Q = 207, + FixedFncTexture4S = 208, + FixedFncTexture4T = 209, + FixedFncTexture4R = 210, + FixedFncTexture4Q = 211, + FixedFncTexture5S = 212, + FixedFncTexture5T = 213, + FixedFncTexture5R = 214, + FixedFncTexture5Q = 215, + FixedFncTexture6S = 216, + FixedFncTexture6T = 217, + FixedFncTexture6R = 218, + FixedFncTexture6Q = 219, + FixedFncTexture7S = 220, + FixedFncTexture7T = 221, + FixedFncTexture7R = 222, + FixedFncTexture7Q = 223, + FixedFncTexture8S = 224, + FixedFncTexture8T = 225, + FixedFncTexture8R = 226, + FixedFncTexture8Q = 227, + FixedFncTexture9S = 228, + FixedFncTexture9T = 229, + FixedFncTexture9R = 230, + FixedFncTexture9Q = 231, + ViewportMask = 232, + FrontFace = 255, +}; + +constexpr size_t NUM_GENERICS = 32; + +[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; + +[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); + +[[nodiscard]] u32 GenericAttributeElement(Attribute attribute); + +[[nodiscard]] std::string NameOf(Attribute attribute); + +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value); +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Attribute> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp new file mode 100644 index 000000000..7c08b25ce --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <initializer_list> +#include <map> +#include <memory> + +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {} + +Block::~Block() = default; + +void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { + PrependNewInst(end(), op, args); +} + +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list<Value> args, u32 flags) { + Inst* const inst{inst_pool->Create(op, flags)}; + const auto result_it{instructions.insert(insertion_point, *inst)}; + + if (inst->NumArgs() != args.size()) { + throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); + } + std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { + inst->SetArg(index, arg); + ++index; + }); + return result_it; +} + +void Block::AddBranch(Block* block) { + if (std::ranges::find(imm_successors, block) != imm_successors.end()) { + throw LogicError("Successor already inserted"); + } + if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { + throw LogicError("Predecessor already inserted"); + } + imm_successors.push_back(block); + block->imm_predecessors.push_back(this); +} + +static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, + Block* block) { + if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); +} + +static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, + const Inst* inst) { + const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; + if (is_inserted) { + ++inst_index; + } + return it->second; +} + +static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, + const Value& arg) { + if (arg.IsEmpty()) { + return "<null>"; + } + if (!arg.IsImmediate() || arg.IsIdentity()) { + return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); + } + switch (arg.Type()) { + case Type::U1: + return fmt::format("#{}", arg.U1() ? "true" : "false"); + case Type::U8: + return fmt::format("#{}", arg.U8()); + case Type::U16: + return fmt::format("#{}", arg.U16()); + case Type::U32: + return fmt::format("#{}", arg.U32()); + case Type::U64: + return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); + case Type::Reg: + return fmt::format("{}", arg.Reg()); + case Type::Pred: + return fmt::format("{}", arg.Pred()); + case Type::Attribute: + return fmt::format("{}", arg.Attribute()); + default: + return "<unknown immediate type>"; + } +} + +std::string DumpBlock(const Block& block) { + size_t inst_index{0}; + std::map<const Inst*, size_t> inst_to_index; + return DumpBlock(block, {}, inst_to_index, inst_index); +} + +std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index, + std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) { + std::string ret{"Block"}; + if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { + ret += fmt::format(" ${}", it->second); + } + ret += '\n'; + for (const Inst& inst : block) { + const Opcode op{inst.GetOpcode()}; + ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); + if (TypeOf(op) != Type::Void) { + ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); + } else { + ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces + } + const size_t arg_count{inst.NumArgs()}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; + ret += arg_index != 0 ? ", " : " "; + if (op == Opcode::Phi) { + ret += fmt::format("[ {}, {} ]", arg_str, + BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); + } else { + ret += arg_str; + } + if (op != Opcode::Phi) { + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + } + } + } + if (TypeOf(op) != Type::Void) { + ret += fmt::format(" (uses: {})\n", inst.UseCount()); + } else { + ret += '\n'; + } + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h new file mode 100644 index 000000000..7e134b4c7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -0,0 +1,185 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <initializer_list> +#include <map> +#include <span> +#include <vector> + +#include <boost/intrusive/list.hpp> + +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/condition.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { + +class Block { +public: + using InstructionList = boost::intrusive::list<Inst>; + using size_type = InstructionList::size_type; + using iterator = InstructionList::iterator; + using const_iterator = InstructionList::const_iterator; + using reverse_iterator = InstructionList::reverse_iterator; + using const_reverse_iterator = InstructionList::const_reverse_iterator; + + explicit Block(ObjectPool<Inst>& inst_pool_); + ~Block(); + + Block(const Block&) = delete; + Block& operator=(const Block&) = delete; + + Block(Block&&) = default; + Block& operator=(Block&&) = default; + + /// Appends a new instruction to the end of this basic block. + void AppendNewInst(Opcode op, std::initializer_list<Value> args); + + /// Prepends a new instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list<Value> args = {}, u32 flags = 0); + + /// Adds a new branch to this basic block. + void AddBranch(Block* block); + + /// Gets a mutable reference to the instruction list for this basic block. + [[nodiscard]] InstructionList& Instructions() noexcept { + return instructions; + } + /// Gets an immutable reference to the instruction list for this basic block. + [[nodiscard]] const InstructionList& Instructions() const noexcept { + return instructions; + } + + /// Gets an immutable span to the immediate predecessors. + [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept { + return imm_predecessors; + } + /// Gets an immutable span to the immediate successors. + [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept { + return imm_successors; + } + + /// Intrusively store the host definition of this instruction. + template <typename DefinitionType> + void SetDefinition(DefinitionType def) { + definition = Common::BitCast<u32>(def); + } + + /// Return the intrusively stored host definition of this instruction. + template <typename DefinitionType> + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast<DefinitionType>(definition); + } + + void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { + ssa_reg_values[RegIndex(reg)] = value; + } + const Value& SsaRegValue(IR::Reg reg) const noexcept { + return ssa_reg_values[RegIndex(reg)]; + } + + void SsaSeal() noexcept { + is_ssa_sealed = true; + } + [[nodiscard]] bool IsSsaSealed() const noexcept { + return is_ssa_sealed; + } + + [[nodiscard]] bool empty() const { + return instructions.empty(); + } + [[nodiscard]] size_type size() const { + return instructions.size(); + } + + [[nodiscard]] Inst& front() { + return instructions.front(); + } + [[nodiscard]] const Inst& front() const { + return instructions.front(); + } + + [[nodiscard]] Inst& back() { + return instructions.back(); + } + [[nodiscard]] const Inst& back() const { + return instructions.back(); + } + + [[nodiscard]] iterator begin() { + return instructions.begin(); + } + [[nodiscard]] const_iterator begin() const { + return instructions.begin(); + } + [[nodiscard]] iterator end() { + return instructions.end(); + } + [[nodiscard]] const_iterator end() const { + return instructions.end(); + } + + [[nodiscard]] reverse_iterator rbegin() { + return instructions.rbegin(); + } + [[nodiscard]] const_reverse_iterator rbegin() const { + return instructions.rbegin(); + } + [[nodiscard]] reverse_iterator rend() { + return instructions.rend(); + } + [[nodiscard]] const_reverse_iterator rend() const { + return instructions.rend(); + } + + [[nodiscard]] const_iterator cbegin() const { + return instructions.cbegin(); + } + [[nodiscard]] const_iterator cend() const { + return instructions.cend(); + } + + [[nodiscard]] const_reverse_iterator crbegin() const { + return instructions.crbegin(); + } + [[nodiscard]] const_reverse_iterator crend() const { + return instructions.crend(); + } + +private: + /// Memory pool for instruction list + ObjectPool<Inst>* inst_pool; + + /// List of instructions in this block + InstructionList instructions; + + /// Block immediate predecessors + std::vector<Block*> imm_predecessors; + /// Block immediate successors + std::vector<Block*> imm_successors; + + /// Intrusively store the value of a register in the block. + std::array<Value, NUM_REGS> ssa_reg_values; + /// Intrusively store if the block is sealed in the SSA pass. + bool is_ssa_sealed{false}; + + /// Intrusively stored host definition of this block. + u32 definition{}; +}; + +using BlockList = std::vector<Block*>; + +[[nodiscard]] std::string DumpBlock(const Block& block); + +[[nodiscard]] std::string DumpBlock(const Block& block, + const std::map<const Block*, size_t>& block_to_index, + std::map<const Inst*, size_t>& inst_to_index, + size_t& inst_index); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h new file mode 100644 index 000000000..a52ccbd58 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> +#include <type_traits> +#include <queue> + +#include <boost/container/small_vector.hpp> + +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +template <typename Pred> +auto BreadthFirstSearch(const Value& value, Pred&& pred) + -> std::invoke_result_t<Pred, const Inst*> { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + // Breadth-first search visiting the right most arguments first + // Small vector has been determined from shaders in Super Smash Bros. Ultimate + boost::container::small_vector<const Inst*, 2> visited; + std::queue<const Inst*> queue; + queue.push(value.InstRecursive()); + + while (!queue.empty()) { + // Pop one instruction from the queue + const Inst* const inst{queue.front()}; + queue.pop(); + if (const std::optional result = pred(inst)) { + // This is the instruction we were looking for + return result; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + const Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + const Inst* const arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + // SSA tree has been traversed and the result hasn't been found + return std::nullopt; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp new file mode 100644 index 000000000..fc18ea2a2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -0,0 +1,29 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> + +#include <fmt/format.h> + +#include "shader_recompiler/frontend/ir/condition.h" + +namespace Shader::IR { + +std::string NameOf(Condition condition) { + std::string ret; + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); + } + const auto [pred, negated]{condition.GetPred()}; + if (!ret.empty()) { + ret += '&'; + } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h new file mode 100644 index 000000000..aa8597c60 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <compare> +#include <string> + +#include <fmt/format.h> + +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/pred.h" + +namespace Shader::IR { + +class Condition { +public: + Condition() noexcept = default; + + explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept + : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)}, + pred_negated{pred_negated_ ? u8{1} : u8{0}} {} + + explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept + : Condition(FlowTest::T, pred_, pred_negated_) {} + + explicit Condition(bool value) : Condition(Pred::PT, !value) {} + + auto operator<=>(const Condition&) const noexcept = default; + + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { + return static_cast<IR::FlowTest>(flow_test); + } + + [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept { + return {static_cast<IR::Pred>(pred), pred_negated != 0}; + } + +private: + u16 flow_test; + u8 pred; + u8 pred_negated; +}; + +std::string NameOf(Condition condition); + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Condition> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Condition& cond, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp new file mode 100644 index 000000000..6ebb4ad89 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.cpp @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> + +#include <fmt/format.h> + +#include "shader_recompiler/frontend/ir/flow_test.h" + +namespace Shader::IR { + +std::string NameOf(FlowTest flow_test) { + switch (flow_test) { + case FlowTest::F: + return "F"; + case FlowTest::LT: + return "LT"; + case FlowTest::EQ: + return "EQ"; + case FlowTest::LE: + return "LE"; + case FlowTest::GT: + return "GT"; + case FlowTest::NE: + return "NE"; + case FlowTest::GE: + return "GE"; + case FlowTest::NUM: + return "NUM"; + case FlowTest::NaN: + return "NAN"; + case FlowTest::LTU: + return "LTU"; + case FlowTest::EQU: + return "EQU"; + case FlowTest::LEU: + return "LEU"; + case FlowTest::GTU: + return "GTU"; + case FlowTest::NEU: + return "NEU"; + case FlowTest::GEU: + return "GEU"; + case FlowTest::T: + return "T"; + case FlowTest::OFF: + return "OFF"; + case FlowTest::LO: + return "LO"; + case FlowTest::SFF: + return "SFF"; + case FlowTest::LS: + return "LS"; + case FlowTest::HI: + return "HI"; + case FlowTest::SFT: + return "SFT"; + case FlowTest::HS: + return "HS"; + case FlowTest::OFT: + return "OFT"; + case FlowTest::CSM_TA: + return "CSM_TA"; + case FlowTest::CSM_TR: + return "CSM_TR"; + case FlowTest::CSM_MX: + return "CSM_MX"; + case FlowTest::FCSM_TA: + return "FCSM_TA"; + case FlowTest::FCSM_TR: + return "FCSM_TR"; + case FlowTest::FCSM_MX: + return "FCSM_MX"; + case FlowTest::RLE: + return "RLE"; + case FlowTest::RGT: + return "RGT"; + } + return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test)); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h new file mode 100644 index 000000000..09e113773 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <fmt/format.h> + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class FlowTest : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + NaN, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, + OFF, + LO, + SFF, + LS, + HI, + SFT, + HS, + OFT, + CSM_TA, + CSM_TR, + CSM_MX, + FCSM_TA, + FCSM_TR, + FCSM_MX, + RLE, + RGT, +}; + +[[nodiscard]] std::string NameOf(FlowTest flow_test); + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::FlowTest> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp new file mode 100644 index 000000000..13159a68d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -0,0 +1,2017 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_cast.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { +namespace { +[[noreturn]] void ThrowInvalidType(Type type) { + throw InvalidArgument("Invalid type {}", type); +} + +Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { + if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { + return ir.CompositeConstruct(bias_lod, lod_clamp); + } else if (!bias_lod.IsEmpty()) { + return bias_lod; + } else if (!lod_clamp.IsEmpty()) { + return lod_clamp; + } else { + return Value{}; + } +} +} // Anonymous namespace + +U1 IREmitter::Imm1(bool value) const { + return U1{Value{value}}; +} + +U8 IREmitter::Imm8(u8 value) const { + return U8{Value{value}}; +} + +U16 IREmitter::Imm16(u16 value) const { + return U16{Value{value}}; +} + +U32 IREmitter::Imm32(u32 value) const { + return U32{Value{value}}; +} + +U32 IREmitter::Imm32(s32 value) const { + return U32{Value{static_cast<u32>(value)}}; +} + +F32 IREmitter::Imm32(f32 value) const { + return F32{Value{value}}; +} + +U64 IREmitter::Imm64(u64 value) const { + return U64{Value{value}}; +} + +U64 IREmitter::Imm64(s64 value) const { + return U64{Value{static_cast<u64>(value)}}; +} + +F64 IREmitter::Imm64(f64 value) const { + return F64{Value{value}}; +} + +U1 IREmitter::ConditionRef(const U1& value) { + return Inst<U1>(Opcode::ConditionRef, value); +} + +void IREmitter::Reference(const Value& value) { + Inst(Opcode::Reference, value); +} + +void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { + Inst(Opcode::PhiMove, Value{&phi}, value); +} + +void IREmitter::Prologue() { + Inst(Opcode::Prologue); +} + +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); +} + +void IREmitter::DemoteToHelperInvocation() { + Inst(Opcode::DemoteToHelperInvocation); +} + +void IREmitter::EmitVertex(const U32& stream) { + Inst(Opcode::EmitVertex, stream); +} + +void IREmitter::EndPrimitive(const U32& stream) { + Inst(Opcode::EndPrimitive, stream); +} + +void IREmitter::Barrier() { + Inst(Opcode::Barrier); +} + +void IREmitter::WorkgroupMemoryBarrier() { + Inst(Opcode::WorkgroupMemoryBarrier); +} + +void IREmitter::DeviceMemoryBarrier() { + Inst(Opcode::DeviceMemoryBarrier); +} + +U32 IREmitter::GetReg(IR::Reg reg) { + return Inst<U32>(Opcode::GetRegister, reg); +} + +void IREmitter::SetReg(IR::Reg reg, const U32& value) { + Inst(Opcode::SetRegister, reg, value); +} + +U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { + if (pred == Pred::PT) { + return Imm1(!is_negated); + } + const U1 value{Inst<U1>(Opcode::GetPred, pred)}; + if (is_negated) { + return Inst<U1>(Opcode::LogicalNot, value); + } else { + return value; + } +} + +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + if (pred != IR::Pred::PT) { + Inst(Opcode::SetPred, pred, value); + } +} + +U1 IREmitter::GetGotoVariable(u32 id) { + return Inst<U1>(Opcode::GetGotoVariable, id); +} + +void IREmitter::SetGotoVariable(u32 id, const U1& value) { + Inst(Opcode::SetGotoVariable, id, value); +} + +U32 IREmitter::GetIndirectBranchVariable() { + return Inst<U32>(Opcode::GetIndirectBranchVariable); +} + +void IREmitter::SetIndirectBranchVariable(const U32& value) { + Inst(Opcode::SetIndirectBranchVariable, value); +} + +U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { + return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); +} + +Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed) { + switch (bitsize) { + case 8: + return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); + case 16: + return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset); + case 32: + return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); + case 64: + return Inst(Opcode::GetCbufU32x2, binding, byte_offset); + default: + throw InvalidArgument("Invalid bit size {}", bitsize); + } +} + +F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) { + return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset); +} + +U1 IREmitter::GetZFlag() { + return Inst<U1>(Opcode::GetZFlag); +} + +U1 IREmitter::GetSFlag() { + return Inst<U1>(Opcode::GetSFlag); +} + +U1 IREmitter::GetCFlag() { + return Inst<U1>(Opcode::GetCFlag); +} + +U1 IREmitter::GetOFlag() { + return Inst<U1>(Opcode::GetOFlag); +} + +void IREmitter::SetZFlag(const U1& value) { + Inst(Opcode::SetZFlag, value); +} + +void IREmitter::SetSFlag(const U1& value) { + Inst(Opcode::SetSFlag, value); +} + +void IREmitter::SetCFlag(const U1& value) { + Inst(Opcode::SetCFlag, value); +} + +void IREmitter::SetOFlag(const U1& value) { + Inst(Opcode::SetOFlag, value); +} + +static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { + switch (flow_test) { + case FlowTest::F: + return ir.Imm1(false); + case FlowTest::LT: + return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())), + ir.GetOFlag()); + case FlowTest::EQ: + return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()); + case FlowTest::LE: + return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); + case FlowTest::GT: + return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()), + ir.LogicalNot(ir.GetZFlag())); + case FlowTest::NE: + return ir.LogicalNot(ir.GetZFlag()); + case FlowTest::GE: + return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag())); + case FlowTest::NUM: + return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::NaN: + return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); + case FlowTest::LTU: + return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()); + case FlowTest::EQU: + return ir.GetZFlag(); + case FlowTest::LEU: + return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag()); + case FlowTest::GTU: + return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), + ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); + case FlowTest::NEU: + return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::GEU: + return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()), + ir.GetOFlag()); + case FlowTest::T: + return ir.Imm1(true); + case FlowTest::OFF: + return ir.LogicalNot(ir.GetOFlag()); + case FlowTest::LO: + return ir.LogicalNot(ir.GetCFlag()); + case FlowTest::SFF: + return ir.LogicalNot(ir.GetSFlag()); + case FlowTest::LS: + return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag())); + case FlowTest::HI: + return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::SFT: + return ir.GetSFlag(); + case FlowTest::HS: + return ir.GetCFlag(); + case FlowTest::OFT: + return ir.GetOFlag(); + case FlowTest::RLE: + return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); + case FlowTest::RGT: + return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::FCSM_TR: + LOG_WARNING(Shader, "(STUBBED) FCSM_TR"); + return ir.Imm1(false); + case FlowTest::CSM_TA: + case FlowTest::CSM_TR: + case FlowTest::CSM_MX: + case FlowTest::FCSM_TA: + case FlowTest::FCSM_MX: + default: + throw NotImplementedException("Flow test {}", flow_test); + } +} + +U1 IREmitter::Condition(IR::Condition cond) { + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } + return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); +} + +U1 IREmitter::GetFlowTestResult(FlowTest test) { + return GetFlowTest(*this, test); +} + +F32 IREmitter::GetAttribute(IR::Attribute attribute) { + return GetAttribute(attribute, Imm32(0)); +} + +F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { + return Inst<F32>(Opcode::GetAttribute, attribute, vertex); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttribute, attribute, value, vertex); +} + +F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { + return GetAttributeIndexed(phys_address, Imm32(0)); +} + +F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { + return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex); +} + +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); +} + +F32 IREmitter::GetPatch(Patch patch) { + return Inst<F32>(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + +void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { + Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); +} + +void IREmitter::SetSampleMask(const U32& value) { + Inst(Opcode::SetSampleMask, value); +} + +void IREmitter::SetFragDepth(const F32& value) { + Inst(Opcode::SetFragDepth, value); +} + +U32 IREmitter::WorkgroupIdX() { + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; +} + +U32 IREmitter::WorkgroupIdY() { + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)}; +} + +U32 IREmitter::WorkgroupIdZ() { + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; +} + +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + +U32 IREmitter::LocalInvocationIdX() { + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; +} + +U32 IREmitter::LocalInvocationIdY() { + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)}; +} + +U32 IREmitter::LocalInvocationIdZ() { + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; +} + +U32 IREmitter::InvocationId() { + return Inst<U32>(Opcode::InvocationId); +} + +U32 IREmitter::SampleId() { + return Inst<U32>(Opcode::SampleId); +} + +U1 IREmitter::IsHelperInvocation() { + return Inst<U1>(Opcode::IsHelperInvocation); +} + +F32 IREmitter::YDirection() { + return Inst<F32>(Opcode::YDirection); +} + +U32 IREmitter::LaneId() { + return Inst<U32>(Opcode::LaneId); +} + +U32 IREmitter::LoadGlobalU8(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalU8, address); +} + +U32 IREmitter::LoadGlobalS8(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalS8, address); +} + +U32 IREmitter::LoadGlobalU16(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalU16, address); +} + +U32 IREmitter::LoadGlobalS16(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalS16, address); +} + +U32 IREmitter::LoadGlobal32(const U64& address) { + return Inst<U32>(Opcode::LoadGlobal32, address); +} + +Value IREmitter::LoadGlobal64(const U64& address) { + return Inst<Value>(Opcode::LoadGlobal64, address); +} + +Value IREmitter::LoadGlobal128(const U64& address) { + return Inst<Value>(Opcode::LoadGlobal128, address); +} + +void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalU8, address, value); +} + +void IREmitter::WriteGlobalS8(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalS8, address, value); +} + +void IREmitter::WriteGlobalU16(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalU16, address, value); +} + +void IREmitter::WriteGlobalS16(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalS16, address, value); +} + +void IREmitter::WriteGlobal32(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobal32, address, value); +} + +void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) { + Inst(Opcode::WriteGlobal64, address, vector); +} + +void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { + Inst(Opcode::WriteGlobal128, address, vector); +} + +U32 IREmitter::LoadLocal(const IR::U32& word_offset) { + return Inst<U32>(Opcode::LoadLocal, word_offset); +} + +void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { + Inst(Opcode::WriteLocal, word_offset, value); +} + +Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { + switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); + case 32: + return Inst(Opcode::LoadSharedU32, offset); + case 64: + return Inst(Opcode::LoadSharedU64, offset); + case 128: + return Inst(Opcode::LoadSharedU128, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size); +} + +void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { + switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + case 128: + Inst(Opcode::WriteSharedU128, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + } +} + +U1 IREmitter::GetZeroFromOp(const Value& op) { + return Inst<U1>(Opcode::GetZeroFromOp, op); +} + +U1 IREmitter::GetSignFromOp(const Value& op) { + return Inst<U1>(Opcode::GetSignFromOp, op); +} + +U1 IREmitter::GetCarryFromOp(const Value& op) { + return Inst<U1>(Opcode::GetCarryFromOp, op); +} + +U1 IREmitter::GetOverflowFromOp(const Value& op) { + return Inst<U1>(Opcode::GetOverflowFromOp, op); +} + +U1 IREmitter::GetSparseFromOp(const Value& op) { + return Inst<U1>(Opcode::GetSparseFromOp, op); +} + +U1 IREmitter::GetInBoundsFromOp(const Value& op) { + return Inst<U1>(Opcode::GetInBoundsFromOp, op); +} + +F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b); + case Type::F32: + return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b); + case Type::F64: + return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { + if (e1.Type() != e2.Type()) { + throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); + } + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x2, e1, e2); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x2, e1, e2); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x2, e1, e2); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x2, e1, e2); + default: + ThrowInvalidType(e1.Type()); + } +} + +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); + } + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3); + default: + ThrowInvalidType(e1.Type()); + } +} + +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { + throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), + e3.Type(), e4.Type()); + } + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + default: + ThrowInvalidType(e1.Type()); + } +} + +Value IREmitter::CompositeExtract(const Value& vector, size_t element) { + const auto read{[&](Opcode opcode, size_t limit) -> Value { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, Value{static_cast<u32>(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return read(Opcode::CompositeExtractU32x2, 2); + case Type::U32x3: + return read(Opcode::CompositeExtractU32x3, 3); + case Type::U32x4: + return read(Opcode::CompositeExtractU32x4, 4); + case Type::F16x2: + return read(Opcode::CompositeExtractF16x2, 2); + case Type::F16x3: + return read(Opcode::CompositeExtractF16x3, 3); + case Type::F16x4: + return read(Opcode::CompositeExtractF16x4, 4); + case Type::F32x2: + return read(Opcode::CompositeExtractF32x2, 2); + case Type::F32x3: + return read(Opcode::CompositeExtractF32x3, 3); + case Type::F32x4: + return read(Opcode::CompositeExtractF32x4, 4); + case Type::F64x2: + return read(Opcode::CompositeExtractF64x2, 2); + case Type::F64x3: + return read(Opcode::CompositeExtractF64x3, 3); + case Type::F64x4: + return read(Opcode::CompositeExtractF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + +Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { + const auto insert{[&](Opcode opcode, size_t limit) { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return insert(Opcode::CompositeInsertU32x2, 2); + case Type::U32x3: + return insert(Opcode::CompositeInsertU32x3, 3); + case Type::U32x4: + return insert(Opcode::CompositeInsertU32x4, 4); + case Type::F16x2: + return insert(Opcode::CompositeInsertF16x2, 2); + case Type::F16x3: + return insert(Opcode::CompositeInsertF16x3, 3); + case Type::F16x4: + return insert(Opcode::CompositeInsertF16x4, 4); + case Type::F32x2: + return insert(Opcode::CompositeInsertF32x2, 2); + case Type::F32x3: + return insert(Opcode::CompositeInsertF32x3, 3); + case Type::F32x4: + return insert(Opcode::CompositeInsertF32x4, 4); + case Type::F64x2: + return insert(Opcode::CompositeInsertF64x2, 2); + case Type::F64x3: + return insert(Opcode::CompositeInsertF64x3, 3); + case Type::F64x4: + return insert(Opcode::CompositeInsertF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + +Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { + if (true_value.Type() != false_value.Type()) { + throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); + } + switch (true_value.Type()) { + case Type::U1: + return Inst(Opcode::SelectU1, condition, true_value, false_value); + case Type::U8: + return Inst(Opcode::SelectU8, condition, true_value, false_value); + case Type::U16: + return Inst(Opcode::SelectU16, condition, true_value, false_value); + case Type::U32: + return Inst(Opcode::SelectU32, condition, true_value, false_value); + case Type::U64: + return Inst(Opcode::SelectU64, condition, true_value, false_value); + case Type::F32: + return Inst(Opcode::SelectF32, condition, true_value, false_value); + case Type::F64: + return Inst(Opcode::SelectF64, condition, true_value, false_value); + default: + throw InvalidArgument("Invalid type {}", true_value.Type()); + } +} + +template <> +IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { + return Inst<IR::U32>(Opcode::BitCastU32F32, value); +} + +template <> +IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { + return Inst<IR::F32>(Opcode::BitCastF32U32, value); +} + +template <> +IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) { + return Inst<IR::U16>(Opcode::BitCastU16F16, value); +} + +template <> +IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) { + return Inst<IR::F16>(Opcode::BitCastF16U16, value); +} + +template <> +IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) { + return Inst<IR::U64>(Opcode::BitCastU64F64, value); +} + +template <> +IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) { + return Inst<IR::F64>(Opcode::BitCastF64U64, value); +} + +U64 IREmitter::PackUint2x32(const Value& vector) { + return Inst<U64>(Opcode::PackUint2x32, vector); +} + +Value IREmitter::UnpackUint2x32(const U64& value) { + return Inst<Value>(Opcode::UnpackUint2x32, value); +} + +U32 IREmitter::PackFloat2x16(const Value& vector) { + return Inst<U32>(Opcode::PackFloat2x16, vector); +} + +Value IREmitter::UnpackFloat2x16(const U32& value) { + return Inst(Opcode::UnpackFloat2x16, value); +} + +U32 IREmitter::PackHalf2x16(const Value& vector) { + return Inst<U32>(Opcode::PackHalf2x16, vector); +} + +Value IREmitter::UnpackHalf2x16(const U32& value) { + return Inst(Opcode::UnpackHalf2x16, value); +} + +F64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst<F64>(Opcode::PackDouble2x32, vector); +} + +Value IREmitter::UnpackDouble2x32(const F64& value) { + return Inst<Value>(Opcode::UnpackDouble2x32, value); +} + +F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b); + case Type::F32: + return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b); + case Type::F64: + return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, + FpControl control) { + if (a.Type() != b.Type() || a.Type() != c.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); + } + switch (a.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c); + case Type::F32: + return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c); + case Type::F64: + return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c); + default: + ThrowInvalidType(a.Type()); + } +} + +F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPAbs16, value); + case Type::F32: + return Inst<F32>(Opcode::FPAbs32, value); + case Type::F64: + return Inst<F64>(Opcode::FPAbs64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPNeg16, value); + case Type::F32: + return Inst<F32>(Opcode::FPNeg32, value); + case Type::F64: + return Inst<F64>(Opcode::FPNeg64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { + F16F32F64 result{value}; + if (abs) { + result = FPAbs(result); + } + if (neg) { + result = FPNeg(result); + } + return result; +} + +F32 IREmitter::FPCos(const F32& value) { + return Inst<F32>(Opcode::FPCos, value); +} + +F32 IREmitter::FPSin(const F32& value) { + return Inst<F32>(Opcode::FPSin, value); +} + +F32 IREmitter::FPExp2(const F32& value) { + return Inst<F32>(Opcode::FPExp2, value); +} + +F32 IREmitter::FPLog2(const F32& value) { + return Inst<F32>(Opcode::FPLog2, value); +} + +F32F64 IREmitter::FPRecip(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst<F32>(Opcode::FPRecip32, value); + case Type::F64: + return Inst<F64>(Opcode::FPRecip64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { + switch (value.Type()) { + case Type::F32: + return Inst<F32>(Opcode::FPRecipSqrt32, value); + case Type::F64: + return Inst<F64>(Opcode::FPRecipSqrt64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F32 IREmitter::FPSqrt(const F32& value) { + return Inst<F32>(Opcode::FPSqrt, value); +} + +F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPSaturate16, value); + case Type::F32: + return Inst<F32>(Opcode::FPSaturate32, value); + case Type::F64: + return Inst<F64>(Opcode::FPSaturate64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value) { + if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), + max_value.Type()); + } + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value); + case Type::F32: + return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value); + case Type::F64: + return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value); + case Type::F32: + return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value); + case Type::F64: + return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPFloor16, Flags{control}, value); + case Type::F32: + return Inst<F32>(Opcode::FPFloor32, Flags{control}, value); + case Type::F64: + return Inst<F64>(Opcode::FPFloor64, Flags{control}, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPCeil16, Flags{control}, value); + case Type::F32: + return Inst<F32>(Opcode::FPCeil32, Flags{control}, value); + case Type::F64: + return Inst<F64>(Opcode::FPCeil64, Flags{control}, value); + default: + ThrowInvalidType(value.Type()); + } +} + +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { + switch (value.Type()) { + case Type::F16: + return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value); + case Type::F32: + return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value); + case Type::F64: + return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control}, + lhs, rhs); + case Type::F32: + return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control}, + lhs, rhs); + case Type::F64: + return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control}, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, + Flags{control}, lhs, rhs); + case Type::F32: + return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, + Flags{control}, lhs, rhs); + case Type::F64: + return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, + Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, + Flags{control}, lhs, rhs); + case Type::F32: + return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, + Flags{control}, lhs, rhs); + case Type::F64: + return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, + Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, + Flags{control}, lhs, rhs); + case Type::F32: + return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, + Flags{control}, lhs, rhs); + case Type::F64: + return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, + Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, + Flags{control}, lhs, rhs); + case Type::F32: + return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, + Flags{control}, lhs, rhs); + case Type::F64: + return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, + Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16 + : Opcode::FPUnordGreaterThanEqual16, + Flags{control}, lhs, rhs); + case Type::F32: + return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32 + : Opcode::FPUnordGreaterThanEqual32, + Flags{control}, lhs, rhs); + case Type::F64: + return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64 + : Opcode::FPUnordGreaterThanEqual64, + Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPIsNan(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst<U1>(Opcode::FPIsNan16, value); + case Type::F32: + return Inst<U1>(Opcode::FPIsNan32, value); + case Type::F64: + return Inst<U1>(Opcode::FPIsNan64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); +} + +U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); +} + +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst<U32>(Opcode::IAdd32, a, b); + case Type::U64: + return Inst<U64>(Opcode::IAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst<U32>(Opcode::ISub32, a, b); + case Type::U64: + return Inst<U64>(Opcode::ISub64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32 IREmitter::IMul(const U32& a, const U32& b) { + return Inst<U32>(Opcode::IMul32, a, b); +} + +U32U64 IREmitter::INeg(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::INeg32, value); + case Type::U64: + return Inst<U64>(Opcode::INeg64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::IAbs(const U32& value) { + return Inst<U32>(Opcode::IAbs32, value); +} + +U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift); + case Type::U64: + return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } +} + +U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst<U32>(Opcode::ShiftRightLogical32, base, shift); + case Type::U64: + return Inst<U64>(Opcode::ShiftRightLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } +} + +U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift); + case Type::U64: + return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift); + default: + ThrowInvalidType(base.Type()); + } +} + +U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { + return Inst<U32>(Opcode::BitwiseAnd32, a, b); +} + +U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { + return Inst<U32>(Opcode::BitwiseOr32, a, b); +} + +U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { + return Inst<U32>(Opcode::BitwiseXor32, a, b); +} + +U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count) { + return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count); +} + +U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed) { + return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, + count); +} + +U32 IREmitter::BitReverse(const U32& value) { + return Inst<U32>(Opcode::BitReverse32, value); +} + +U32 IREmitter::BitCount(const U32& value) { + return Inst<U32>(Opcode::BitCount32, value); +} + +U32 IREmitter::BitwiseNot(const U32& value) { + return Inst<U32>(Opcode::BitwiseNot32, value); +} + +U32 IREmitter::FindSMsb(const U32& value) { + return Inst<U32>(Opcode::FindSMsb32, value); +} + +U32 IREmitter::FindUMsb(const U32& value) { + return Inst<U32>(Opcode::FindUMsb32, value); +} + +U32 IREmitter::SMin(const U32& a, const U32& b) { + return Inst<U32>(Opcode::SMin32, a, b); +} + +U32 IREmitter::UMin(const U32& a, const U32& b) { + return Inst<U32>(Opcode::UMin32, a, b); +} + +U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMin(a, b) : UMin(a, b); +} + +U32 IREmitter::SMax(const U32& a, const U32& b) { + return Inst<U32>(Opcode::SMax32, a, b); +} + +U32 IREmitter::UMax(const U32& a, const U32& b) { + return Inst<U32>(Opcode::UMax32, a, b); +} + +U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMax(a, b) : UMax(a, b); +} + +U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) { + return Inst<U32>(Opcode::SClamp32, value, min, max); +} + +U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { + return Inst<U32>(Opcode::UClamp32, value, min, max); +} + +U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +} + +U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst<U1>(Opcode::IEqual, lhs, rhs); + case Type::U64: { + // Manually compare the unpacked values + const Value lhs_vector{UnpackUint2x32(lhs)}; + const Value rhs_vector{UnpackUint2x32(rhs)}; + return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, + IR::U32{CompositeExtract(rhs_vector, 0)}), + IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, + IR::U32{CompositeExtract(rhs_vector, 1)})); + } + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +} + +U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { + return Inst<U1>(Opcode::INotEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +} + +U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMin(pointer_offset, value) + : SharedAtomicUMin(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMax(pointer_offset, value) + : SharedAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value); +} + +U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMin(pointer_offset, value) + : GlobalAtomicUMin(pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMax(pointer_offset, value) + : GlobalAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { + return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); +} + +U1 IREmitter::LogicalOr(const U1& a, const U1& b) { + return Inst<U1>(Opcode::LogicalOr, a, b); +} + +U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { + return Inst<U1>(Opcode::LogicalAnd, a, b); +} + +U1 IREmitter::LogicalXor(const U1& a, const U1& b) { + return Inst<U1>(Opcode::LogicalXor, a, b); +} + +U1 IREmitter::LogicalNot(const U1& value) { + return Inst<U1>(Opcode::LogicalNot, value); +} + +U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::F16: + return Inst<U32>(Opcode::ConvertS16F16, value); + case Type::F32: + return Inst<U32>(Opcode::ConvertS16F32, value); + case Type::F64: + return Inst<U32>(Opcode::ConvertS16F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::F16: + return Inst<U32>(Opcode::ConvertS32F16, value); + case Type::F32: + return Inst<U32>(Opcode::ConvertS32F32, value); + case Type::F64: + return Inst<U32>(Opcode::ConvertS32F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::F16: + return Inst<U64>(Opcode::ConvertS64F16, value); + case Type::F32: + return Inst<U64>(Opcode::ConvertS64F32, value); + case Type::F64: + return Inst<U64>(Opcode::ConvertS64F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::F16: + return Inst<U32>(Opcode::ConvertU16F16, value); + case Type::F32: + return Inst<U32>(Opcode::ConvertU16F32, value); + case Type::F64: + return Inst<U32>(Opcode::ConvertU16F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::F16: + return Inst<U32>(Opcode::ConvertU32F16, value); + case Type::F32: + return Inst<U32>(Opcode::ConvertU32F32, value); + case Type::F64: + return Inst<U32>(Opcode::ConvertU32F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::F16: + return Inst<U64>(Opcode::ConvertU64F16, value); + case Type::F32: + return Inst<U64>(Opcode::ConvertU64F32, value); + case Type::F64: + return Inst<U64>(Opcode::ConvertU64F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { + return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); +} + +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control) { + switch (dest_bitsize) { + case 16: + switch (src_bitsize) { + case 8: + return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value); + case 16: + return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value); + case 32: + return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value); + case 64: + return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value); + } + break; + case 32: + switch (src_bitsize) { + case 8: + return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value); + case 16: + return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value); + case 32: + return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value); + case 64: + return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value); + } + break; + case 64: + switch (src_bitsize) { + case 8: + return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value); + case 16: + return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value); + case 32: + return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value); + case 64: + return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value); + } + break; + } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); +} + +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control) { + switch (dest_bitsize) { + case 16: + switch (src_bitsize) { + case 8: + return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value); + case 16: + return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value); + case 32: + return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value); + case 64: + return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value); + } + break; + case 32: + switch (src_bitsize) { + case 8: + return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value); + case 16: + return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value); + case 32: + return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value); + case 64: + return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value); + } + break; + case 64: + switch (src_bitsize) { + case 8: + return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value); + case 16: + return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value); + case 32: + return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value); + case 64: + return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value); + } + break; + } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); +} + +F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value, FpControl control) { + return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control) + : ConvertUToF(dest_bitsize, src_bitsize, value, control); +} + +U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { + switch (result_bitsize) { + case 32: + switch (value.Type()) { + case Type::U32: + // Nothing to do + return value; + case Type::U64: + return Inst<U32>(Opcode::ConvertU32U64, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::U32: + return Inst<U64>(Opcode::ConvertU64U32, value); + case Type::U64: + // Nothing to do + return value; + default: + break; + } + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) { + switch (result_bitsize) { + case 16: + switch (value.Type()) { + case Type::F16: + // Nothing to do + return value; + case Type::F32: + return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value); + case Type::F64: + throw LogicError("Illegal conversion from F64 to F16"); + default: + break; + } + break; + case 32: + switch (value.Type()) { + case Type::F16: + return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::F16: + throw LogicError("Illegal conversion from F16 to F64"); + case Type::F32: + return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value); + case Type::F64: + // Nothing to do + return value; + default: + break; + } + break; + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod + : Opcode::BindlessImageSampleImplicitLod}; + return Inst(op, Flags{info}, handle, coords, bias_lc, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod + : Opcode::BindlessImageSampleExplicitLod}; + return Inst(op, Flags{info}, handle, coords, lod, offset); +} + +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod + : Opcode::BindlessImageSampleDrefImplicitLod}; + return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset); +} + +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& lod, const Value& offset, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod + : Opcode::BindlessImageSampleDrefExplicitLod}; + return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset); +} + +Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather}; + return Inst(op, Flags{info}, handle, coords, offset, offset2); +} + +Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, const F32& dref, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref + : Opcode::BindlessImageGatherDref}; + return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); +} + +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch}; + return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); +} + +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions + : Opcode::BindlessImageQueryDimensions}; + return Inst(op, handle, lod); +} + +Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod + : Opcode::BindlessImageQueryLod}; + return Inst(op, Flags{info}, handle, coords); +} + +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, + const Value& offset, const F32& lod_clamp, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient + : Opcode::BindlessImageGradient}; + return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); +} + +Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead}; + return Inst(op, Flags{info}, handle, coords); +} + +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; + Inst(op, Flags{info}, handle, coords, color); +} + +Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32 + : Opcode::BindlessImageAtomicIAdd32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32 + : Opcode::BindlessImageAtomicSMin32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32 + : Opcode::BindlessImageAtomicUMin32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value, + bool is_signed, TextureInstInfo info) { + return is_signed ? ImageAtomicSMin(handle, coords, value, info) + : ImageAtomicUMin(handle, coords, value, info); +} + +Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32 + : Opcode::BindlessImageAtomicSMax32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32 + : Opcode::BindlessImageAtomicUMax32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, + bool is_signed, TextureInstInfo info) { + return is_signed ? ImageAtomicSMax(handle, coords, value, info) + : ImageAtomicUMax(handle, coords, value, info); +} + +Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32 + : Opcode::BindlessImageAtomicInc32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32 + : Opcode::BindlessImageAtomicDec32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32 + : Opcode::BindlessImageAtomicAnd32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32 + : Opcode::BindlessImageAtomicOr32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32 + : Opcode::BindlessImageAtomicXor32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32 + : Opcode::BindlessImageAtomicExchange32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +U1 IREmitter::VoteAll(const U1& value) { + return Inst<U1>(Opcode::VoteAll, value); +} + +U1 IREmitter::VoteAny(const U1& value) { + return Inst<U1>(Opcode::VoteAny, value); +} + +U1 IREmitter::VoteEqual(const U1& value) { + return Inst<U1>(Opcode::VoteEqual, value); +} + +U32 IREmitter::SubgroupBallot(const U1& value) { + return Inst<U32>(Opcode::SubgroupBallot, value); +} + +U32 IREmitter::SubgroupEqMask() { + return Inst<U32>(Opcode::SubgroupEqMask); +} + +U32 IREmitter::SubgroupLtMask() { + return Inst<U32>(Opcode::SubgroupLtMask); +} + +U32 IREmitter::SubgroupLeMask() { + return Inst<U32>(Opcode::SubgroupLeMask); +} + +U32 IREmitter::SubgroupGtMask() { + return Inst<U32>(Opcode::SubgroupGtMask); +} + +U32 IREmitter::SubgroupGeMask() { + return Inst<U32>(Opcode::SubgroupGeMask); +} + +U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); +} + +F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { + return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); +} + +F32 IREmitter::DPdxFine(const F32& a) { + return Inst<F32>(Opcode::DPdxFine, a); +} + +F32 IREmitter::DPdyFine(const F32& a) { + return Inst<F32>(Opcode::DPdyFine, a); +} + +F32 IREmitter::DPdxCoarse(const F32& a) { + return Inst<F32>(Opcode::DPdxCoarse, a); +} + +F32 IREmitter::DPdyCoarse(const F32& a) { + return Inst<F32>(Opcode::DPdyCoarse, a); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h new file mode 100644 index 000000000..53f7b3b06 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -0,0 +1,413 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstring> +#include <type_traits> + +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class IREmitter { +public: + explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} + explicit IREmitter(Block& block_, Block::iterator insertion_point_) + : block{&block_}, insertion_point{insertion_point_} {} + + Block* block; + + [[nodiscard]] U1 Imm1(bool value) const; + [[nodiscard]] U8 Imm8(u8 value) const; + [[nodiscard]] U16 Imm16(u16 value) const; + [[nodiscard]] U32 Imm32(u32 value) const; + [[nodiscard]] U32 Imm32(s32 value) const; + [[nodiscard]] F32 Imm32(f32 value) const; + [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(s64 value) const; + [[nodiscard]] F64 Imm64(f64 value) const; + + U1 ConditionRef(const U1& value); + void Reference(const Value& value); + + void PhiMove(IR::Inst& phi, const Value& value); + + void Prologue(); + void Epilogue(); + void DemoteToHelperInvocation(); + void EmitVertex(const U32& stream); + void EndPrimitive(const U32& stream); + + [[nodiscard]] U32 GetReg(IR::Reg reg); + void SetReg(IR::Reg reg, const U32& value); + + [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); + void SetPred(IR::Pred pred, const U1& value); + + [[nodiscard]] U1 GetGotoVariable(u32 id); + void SetGotoVariable(u32 id, const U1& value); + + [[nodiscard]] U32 GetIndirectBranchVariable(); + void SetIndirectBranchVariable(const U32& value); + + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); + [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed); + [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); + + [[nodiscard]] U1 GetZFlag(); + [[nodiscard]] U1 GetSFlag(); + [[nodiscard]] U1 GetCFlag(); + [[nodiscard]] U1 GetOFlag(); + + void SetZFlag(const U1& value); + void SetSFlag(const U1& value); + void SetCFlag(const U1& value); + void SetOFlag(const U1& value); + + [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] U1 GetFlowTestResult(FlowTest test); + + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); + void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); + + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); + void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); + + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + + void SetFragColor(u32 index, u32 component, const F32& value); + void SetSampleMask(const U32& value); + void SetFragDepth(const F32& value); + + [[nodiscard]] U32 WorkgroupIdX(); + [[nodiscard]] U32 WorkgroupIdY(); + [[nodiscard]] U32 WorkgroupIdZ(); + + [[nodiscard]] Value LocalInvocationId(); + [[nodiscard]] U32 LocalInvocationIdX(); + [[nodiscard]] U32 LocalInvocationIdY(); + [[nodiscard]] U32 LocalInvocationIdZ(); + + [[nodiscard]] U32 InvocationId(); + [[nodiscard]] U32 SampleId(); + [[nodiscard]] U1 IsHelperInvocation(); + [[nodiscard]] F32 YDirection(); + + [[nodiscard]] U32 LaneId(); + + [[nodiscard]] U32 LoadGlobalU8(const U64& address); + [[nodiscard]] U32 LoadGlobalS8(const U64& address); + [[nodiscard]] U32 LoadGlobalU16(const U64& address); + [[nodiscard]] U32 LoadGlobalS16(const U64& address); + [[nodiscard]] U32 LoadGlobal32(const U64& address); + [[nodiscard]] Value LoadGlobal64(const U64& address); + [[nodiscard]] Value LoadGlobal128(const U64& address); + + void WriteGlobalU8(const U64& address, const U32& value); + void WriteGlobalS8(const U64& address, const U32& value); + void WriteGlobalU16(const U64& address, const U32& value); + void WriteGlobalS16(const U64& address, const U32& value); + void WriteGlobal32(const U64& address, const U32& value); + void WriteGlobal64(const U64& address, const IR::Value& vector); + void WriteGlobal128(const U64& address, const IR::Value& vector); + + [[nodiscard]] U32 LoadLocal(const U32& word_offset); + void WriteLocal(const U32& word_offset, const U32& value); + + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const U32& offset, const Value& value); + + [[nodiscard]] U1 GetZeroFromOp(const Value& op); + [[nodiscard]] U1 GetSignFromOp(const Value& op); + [[nodiscard]] U1 GetCarryFromOp(const Value& op); + [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + [[nodiscard]] U1 GetSparseFromOp(const Value& op); + [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); + + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, + const Value& false_value); + + void Barrier(); + void WorkgroupMemoryBarrier(); + void DeviceMemoryBarrier(); + + template <typename Dest, typename Source> + [[nodiscard]] Dest BitCast(const Source& value); + + [[nodiscard]] U64 PackUint2x32(const Value& vector); + [[nodiscard]] Value UnpackUint2x32(const U64& value); + + [[nodiscard]] U32 PackFloat2x16(const Value& vector); + [[nodiscard]] Value UnpackFloat2x16(const U32& value); + + [[nodiscard]] U32 PackHalf2x16(const Value& vector); + [[nodiscard]] Value UnpackHalf2x16(const U32& value); + + [[nodiscard]] F64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const F64& value); + + [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); + [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, + FpControl control = {}); + + [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); + + [[nodiscard]] F32 FPCos(const F32& value); + [[nodiscard]] F32 FPSin(const F32& value); + [[nodiscard]] F32 FPExp2(const F32& value); + [[nodiscard]] F32 FPLog2(const F32& value); + [[nodiscard]] F32F64 FPRecip(const F32F64& value); + [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); + [[nodiscard]] F32 FPSqrt(const F32& value); + [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); + + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, + FpControl control = {}, bool ordered = true); + [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + FpControl control = {}, bool ordered = true); + [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + FpControl control = {}, bool ordered = true); + [[nodiscard]] U1 FPIsNan(const F16F32F64& value); + [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); + [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); + + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); + [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32U64 INeg(const U32U64& value); + [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); + [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); + [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count); + [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed = false); + [[nodiscard]] U32 BitReverse(const U32& value); + [[nodiscard]] U32 BitCount(const U32& value); + [[nodiscard]] U32 BitwiseNot(const U32& value); + + [[nodiscard]] U32 FindSMsb(const U32& value); + [[nodiscard]] U32 FindUMsb(const U32& value); + [[nodiscard]] U32 SMin(const U32& a, const U32& b); + [[nodiscard]] U32 UMin(const U32& a, const U32& b); + [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SMax(const U32& a, const U32& b); + [[nodiscard]] U32 UMax(const U32& a, const U32& b); + [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); + [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); + + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); + [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + + [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); + + [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); + + [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalNot(const U1& value); + + [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); + [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); + [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control = {}); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control = {}); + [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value, FpControl control = {}); + + [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, + FpControl control = {}); + + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, + const F32& lod, const Value& offset, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, + const Value& offset, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); + + [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, + TextureInstInfo info); + [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info); + [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, + const Value& offset, const Value& offset2, const F32& dref, + TextureInstInfo info); + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, + const Value& derivates, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); + [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info); + + [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords, + const Value& value, bool is_signed, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, + const Value& value, bool is_signed, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); + [[nodiscard]] U1 VoteAny(const U1& value); + [[nodiscard]] U1 VoteEqual(const U1& value); + [[nodiscard]] U32 SubgroupBallot(const U1& value); + [[nodiscard]] U32 SubgroupEqMask(); + [[nodiscard]] U32 SubgroupLtMask(); + [[nodiscard]] U32 SubgroupLeMask(); + [[nodiscard]] U32 SubgroupGtMask(); + [[nodiscard]] U32 SubgroupGeMask(); + [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, + const IR::U32& clamp, const IR::U32& seg_mask); + [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, + FpControl control = {}); + + [[nodiscard]] F32 DPdxFine(const F32& a); + + [[nodiscard]] F32 DPdyFine(const F32& a); + + [[nodiscard]] F32 DPdxCoarse(const F32& a); + + [[nodiscard]] F32 DPdyCoarse(const F32& a); + +private: + IR::Block::iterator insertion_point; + + template <typename T = Value, typename... Args> + T Inst(Opcode op, Args... args) { + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; + return T{Value{&*it}}; + } + + template <typename T> + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { + Flags() = default; + Flags(T proxy_) : proxy{proxy_} {} + + T proxy; + }; + + template <typename T = Value, typename FlagType, typename... Args> + T Inst(Opcode op, Flags<FlagType> flags, Args... args) { + u32 raw_flags{}; + std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + return T{Value{&*it}}; + } +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp new file mode 100644 index 000000000..3dfa5a880 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -0,0 +1,411 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { +namespace { +void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { + if (inst && inst->GetOpcode() != opcode) { + throw LogicError("Invalid pseudo-instruction"); + } +} + +void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { + if (dest_inst) { + throw LogicError("Only one of each type of pseudo-op allowed"); + } + dest_inst = pseudo_inst; +} + +void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { + throw LogicError("Undoing use of invalid pseudo-op"); + } + inst = nullptr; +} + +void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique<AssociatedInsts>(); + } +} +} // Anonymous namespace + +Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { + if (op == Opcode::Phi) { + std::construct_at(&phi_args); + } else { + std::construct_at(&args); + } +} + +Inst::~Inst() { + if (op == Opcode::Phi) { + std::destroy_at(&phi_args); + } else { + std::destroy_at(&args); + } +} + +bool Inst::MayHaveSideEffects() const noexcept { + switch (op) { + case Opcode::ConditionRef: + case Opcode::Reference: + case Opcode::PhiMove: + case Opcode::Prologue: + case Opcode::Epilogue: + case Opcode::Join: + case Opcode::DemoteToHelperInvocation: + case Opcode::Barrier: + case Opcode::WorkgroupMemoryBarrier: + case Opcode::DeviceMemoryBarrier: + case Opcode::EmitVertex: + case Opcode::EndPrimitive: + case Opcode::SetAttribute: + case Opcode::SetAttributeIndexed: + case Opcode::SetPatch: + case Opcode::SetFragColor: + case Opcode::SetSampleMask: + case Opcode::SetFragDepth: + case Opcode::WriteGlobalU8: + case Opcode::WriteGlobalS8: + case Opcode::WriteGlobalU16: + case Opcode::WriteGlobalS16: + case Opcode::WriteGlobal32: + case Opcode::WriteGlobal64: + case Opcode::WriteGlobal128: + case Opcode::WriteStorageU8: + case Opcode::WriteStorageS8: + case Opcode::WriteStorageU16: + case Opcode::WriteStorageS16: + case Opcode::WriteStorage32: + case Opcode::WriteStorage64: + case Opcode::WriteStorage128: + case Opcode::WriteLocal: + case Opcode::WriteSharedU8: + case Opcode::WriteSharedU16: + case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: + case Opcode::WriteSharedU128: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicInc32: + case Opcode::SharedAtomicDec32: + case Opcode::SharedAtomicAnd32: + case Opcode::SharedAtomicOr32: + case Opcode::SharedAtomicXor32: + case Opcode::SharedAtomicExchange32: + case Opcode::SharedAtomicExchange64: + case Opcode::GlobalAtomicIAdd32: + case Opcode::GlobalAtomicSMin32: + case Opcode::GlobalAtomicUMin32: + case Opcode::GlobalAtomicSMax32: + case Opcode::GlobalAtomicUMax32: + case Opcode::GlobalAtomicInc32: + case Opcode::GlobalAtomicDec32: + case Opcode::GlobalAtomicAnd32: + case Opcode::GlobalAtomicOr32: + case Opcode::GlobalAtomicXor32: + case Opcode::GlobalAtomicExchange32: + case Opcode::GlobalAtomicIAdd64: + case Opcode::GlobalAtomicSMin64: + case Opcode::GlobalAtomicUMin64: + case Opcode::GlobalAtomicSMax64: + case Opcode::GlobalAtomicUMax64: + case Opcode::GlobalAtomicAnd64: + case Opcode::GlobalAtomicOr64: + case Opcode::GlobalAtomicXor64: + case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicAddF32: + case Opcode::GlobalAtomicAddF16x2: + case Opcode::GlobalAtomicAddF32x2: + case Opcode::GlobalAtomicMinF16x2: + case Opcode::GlobalAtomicMinF32x2: + case Opcode::GlobalAtomicMaxF16x2: + case Opcode::GlobalAtomicMaxF32x2: + case Opcode::StorageAtomicIAdd32: + case Opcode::StorageAtomicSMin32: + case Opcode::StorageAtomicUMin32: + case Opcode::StorageAtomicSMax32: + case Opcode::StorageAtomicUMax32: + case Opcode::StorageAtomicInc32: + case Opcode::StorageAtomicDec32: + case Opcode::StorageAtomicAnd32: + case Opcode::StorageAtomicOr32: + case Opcode::StorageAtomicXor32: + case Opcode::StorageAtomicExchange32: + case Opcode::StorageAtomicIAdd64: + case Opcode::StorageAtomicSMin64: + case Opcode::StorageAtomicUMin64: + case Opcode::StorageAtomicSMax64: + case Opcode::StorageAtomicUMax64: + case Opcode::StorageAtomicAnd64: + case Opcode::StorageAtomicOr64: + case Opcode::StorageAtomicXor64: + case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicAddF32: + case Opcode::StorageAtomicAddF16x2: + case Opcode::StorageAtomicAddF32x2: + case Opcode::StorageAtomicMinF16x2: + case Opcode::StorageAtomicMinF32x2: + case Opcode::StorageAtomicMaxF16x2: + case Opcode::StorageAtomicMaxF32x2: + case Opcode::BindlessImageWrite: + case Opcode::BoundImageWrite: + case Opcode::ImageWrite: + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: + return true; + default: + return false; + } +} + +bool Inst::IsPseudoInstruction() const noexcept { + switch (op) { + case Opcode::GetZeroFromOp: + case Opcode::GetSignFromOp: + case Opcode::GetCarryFromOp: + case Opcode::GetOverflowFromOp: + case Opcode::GetSparseFromOp: + case Opcode::GetInBoundsFromOp: + return true; + default: + return false; + } +} + +bool Inst::AreAllArgsImmediates() const { + if (op == Opcode::Phi) { + throw LogicError("Testing for all arguments are immediates on phi instruction"); + } + return std::all_of(args.begin(), args.begin() + NumArgs(), + [](const IR::Value& value) { return value.IsImmediate(); }); +} + +Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { + if (!associated_insts) { + return nullptr; + } + switch (opcode) { + case Opcode::GetZeroFromOp: + CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); + return associated_insts->zero_inst; + case Opcode::GetSignFromOp: + CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); + return associated_insts->sign_inst; + case Opcode::GetCarryFromOp: + CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); + return associated_insts->carry_inst; + case Opcode::GetOverflowFromOp: + CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); + return associated_insts->overflow_inst; + case Opcode::GetSparseFromOp: + CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); + return associated_insts->sparse_inst; + case Opcode::GetInBoundsFromOp: + CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); + return associated_insts->in_bounds_inst; + default: + throw InvalidArgument("{} is not a pseudo-instruction", opcode); + } +} + +IR::Type Inst::Type() const { + return TypeOf(op); +} + +void Inst::SetArg(size_t index, Value value) { + if (index >= NumArgs()) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + const IR::Value arg{Arg(index)}; + if (!arg.IsImmediate()) { + UndoUse(arg); + } + if (!value.IsImmediate()) { + Use(value); + } + if (op == Opcode::Phi) { + phi_args[index].second = value; + } else { + args[index] = value; + } +} + +Block* Inst::PhiBlock(size_t index) const { + if (op != Opcode::Phi) { + throw LogicError("{} is not a Phi instruction", op); + } + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction"); + } + return phi_args[index].first; +} + +void Inst::AddPhiOperand(Block* predecessor, const Value& value) { + if (!value.IsImmediate()) { + Use(value); + } + phi_args.emplace_back(predecessor, value); +} + +void Inst::Invalidate() { + ClearArgs(); + ReplaceOpcode(Opcode::Void); +} + +void Inst::ClearArgs() { + if (op == Opcode::Phi) { + for (auto& pair : phi_args) { + IR::Value& value{pair.second}; + if (!value.IsImmediate()) { + UndoUse(value); + } + } + phi_args.clear(); + } else { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + } + // Reset arguments to null + // std::memset was measured to be faster on MSVC than std::ranges:fill + std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args)); + } +} + +void Inst::ReplaceUsesWith(Value replacement) { + Invalidate(); + ReplaceOpcode(Opcode::Identity); + if (!replacement.IsImmediate()) { + Use(replacement); + } + args[0] = replacement; +} + +void Inst::ReplaceOpcode(IR::Opcode opcode) { + if (opcode == IR::Opcode::Phi) { + throw LogicError("Cannot transition into Phi"); + } + if (op == Opcode::Phi) { + // Transition out of phi arguments into non-phi + std::destroy_at(&phi_args); + std::construct_at(&args); + } + op = opcode; +} + +void Inst::Use(const Value& value) { + Inst* const inst{value.Inst()}; + ++inst->use_count; + + std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; + switch (op) { + case Opcode::GetZeroFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->zero_inst, this); + break; + case Opcode::GetSignFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sign_inst, this); + break; + case Opcode::GetCarryFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->carry_inst, this); + break; + case Opcode::GetOverflowFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->overflow_inst, this); + break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sparse_inst, this); + break; + case Opcode::GetInBoundsFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->in_bounds_inst, this); + break; + default: + break; + } +} + +void Inst::UndoUse(const Value& value) { + Inst* const inst{value.Inst()}; + --inst->use_count; + + std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; + switch (op) { + case Opcode::GetZeroFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); + break; + case Opcode::GetSignFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); + break; + case Opcode::GetCarryFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); + break; + case Opcode::GetOverflowFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); + break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp); + break; + case Opcode::GetInBoundsFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); + break; + default: + break; + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 000000000..77cda1f8a --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -0,0 +1,49 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::IR { + +enum class FmzMode : u8 { + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) +}; + +enum class FpRounding : u8 { + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero +}; + +struct FpControl { + bool no_contraction{false}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; +}; +static_assert(sizeof(FpControl) <= sizeof(u32)); + +union TextureInstInfo { + u32 raw; + BitField<0, 16, u32> descriptor_index; + BitField<16, 3, TextureType> type; + BitField<19, 1, u32> is_depth; + BitField<20, 1, u32> has_bias; + BitField<21, 1, u32> has_lod_clamp; + BitField<22, 1, u32> relaxed_precision; + BitField<23, 2, u32> gather_component; + BitField<25, 2, u32> num_derivates; + BitField<27, 3, ImageFormat> image_format; +}; +static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp new file mode 100644 index 000000000..24d024ad7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -0,0 +1,15 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string_view> + +#include "shader_recompiler/frontend/ir/opcodes.h" + +namespace Shader::IR { + +std::string_view NameOf(Opcode op) { + return Detail::META_TABLE[static_cast<size_t>(op)].name; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h new file mode 100644 index 000000000..9ab108292 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> +#include <string_view> + +#include <fmt/format.h> + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +enum class Opcode { +#define OPCODE(name, ...) name, +#include "opcodes.inc" +#undef OPCODE +}; + +namespace Detail { +struct OpcodeMeta { + std::string_view name; + Type type; + std::array<Type, 5> arg_types; +}; + +// using enum Type; +constexpr Type Void{Type::Void}; +constexpr Type Opaque{Type::Opaque}; +constexpr Type Reg{Type::Reg}; +constexpr Type Pred{Type::Pred}; +constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; +constexpr Type U1{Type::U1}; +constexpr Type U8{Type::U8}; +constexpr Type U16{Type::U16}; +constexpr Type U32{Type::U32}; +constexpr Type U64{Type::U64}; +constexpr Type F16{Type::F16}; +constexpr Type F32{Type::F32}; +constexpr Type F64{Type::F64}; +constexpr Type U32x2{Type::U32x2}; +constexpr Type U32x3{Type::U32x3}; +constexpr Type U32x4{Type::U32x4}; +constexpr Type F16x2{Type::F16x2}; +constexpr Type F16x3{Type::F16x3}; +constexpr Type F16x4{Type::F16x4}; +constexpr Type F32x2{Type::F32x2}; +constexpr Type F32x3{Type::F32x3}; +constexpr Type F32x4{Type::F32x4}; +constexpr Type F64x2{Type::F64x2}; +constexpr Type F64x3{Type::F64x3}; +constexpr Type F64x4{Type::F64x4}; + +constexpr OpcodeMeta META_TABLE[]{ +#define OPCODE(name_token, type_token, ...) \ + { \ + .name{#name_token}, \ + .type = type_token, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcodes.inc" +#undef OPCODE +}; +constexpr size_t CalculateNumArgsOf(Opcode op) { + const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; + return static_cast<size_t>( + std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); +} + +constexpr u8 NUM_ARGS[]{ +#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)), +#include "opcodes.inc" +#undef OPCODE +}; +} // namespace Detail + +/// Get return type of an opcode +[[nodiscard]] inline Type TypeOf(Opcode op) noexcept { + return Detail::META_TABLE[static_cast<size_t>(op)].type; +} + +/// Get the number of arguments an opcode accepts +[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { + return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]); +} + +/// Get the required type of an argument of an opcode +[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept { + return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index]; +} + +/// Get the name of an opcode +[[nodiscard]] std::string_view NameOf(Opcode op); + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Opcode> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { + return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc new file mode 100644 index 000000000..d91098c80 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -0,0 +1,550 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... +OPCODE(Phi, Opaque, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) +OPCODE(ConditionRef, U1, U1, ) +OPCODE(Reference, Void, Opaque, ) +OPCODE(PhiMove, Void, Opaque, Opaque, ) + +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) +OPCODE(Join, Void, ) +OPCODE(DemoteToHelperInvocation, Void, ) +OPCODE(EmitVertex, Void, U32, ) +OPCODE(EndPrimitive, Void, U32, ) + +// Barriers +OPCODE(Barrier, Void, ) +OPCODE(WorkgroupMemoryBarrier, Void, ) +OPCODE(DeviceMemoryBarrier, Void, ) + +// Context getters/setters +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetIndirectBranchVariable, U32, ) +OPCODE(SetIndirectBranchVariable, Void, U32, ) +OPCODE(GetCbufU8, U32, U32, U32, ) +OPCODE(GetCbufS8, U32, U32, U32, ) +OPCODE(GetCbufU16, U32, U32, U32, ) +OPCODE(GetCbufS16, U32, U32, U32, ) +OPCODE(GetCbufU32, U32, U32, U32, ) +OPCODE(GetCbufF32, F32, U32, U32, ) +OPCODE(GetCbufU32x2, U32x2, U32, U32, ) +OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetAttributeIndexed, F32, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) +OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetSampleMask, Void, U32, ) +OPCODE(SetFragDepth, Void, F32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) +OPCODE(InvocationId, U32, ) +OPCODE(SampleId, U32, ) +OPCODE(IsHelperInvocation, U1, ) +OPCODE(YDirection, F32, ) + +// Undefined +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) + +// Memory operations +OPCODE(LoadGlobalU8, U32, Opaque, ) +OPCODE(LoadGlobalS8, U32, Opaque, ) +OPCODE(LoadGlobalU16, U32, Opaque, ) +OPCODE(LoadGlobalS16, U32, Opaque, ) +OPCODE(LoadGlobal32, U32, Opaque, ) +OPCODE(LoadGlobal64, U32x2, Opaque, ) +OPCODE(LoadGlobal128, U32x4, Opaque, ) +OPCODE(WriteGlobalU8, Void, Opaque, U32, ) +OPCODE(WriteGlobalS8, Void, Opaque, U32, ) +OPCODE(WriteGlobalU16, Void, Opaque, U32, ) +OPCODE(WriteGlobalS16, Void, Opaque, U32, ) +OPCODE(WriteGlobal32, Void, Opaque, U32, ) +OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) +OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) + +// Storage buffer operations +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) + +// Local memory operations +OPCODE(LoadLocal, U32, U32, ) +OPCODE(WriteLocal, Void, U32, U32, ) + +// Shared memory operations +OPCODE(LoadSharedU8, U32, U32, ) +OPCODE(LoadSharedS8, U32, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) +OPCODE(LoadSharedS16, U32, U32, ) +OPCODE(LoadSharedU32, U32, U32, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(LoadSharedU128, U32x4, U32, ) +OPCODE(WriteSharedU8, Void, U32, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) +OPCODE(WriteSharedU32, Void, U32, U32, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) +OPCODE(WriteSharedU128, Void, U32, U32x4, ) + +// Vector utility +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) + +// Select operations +OPCODE(SelectU1, U1, U1, U1, U1, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectF64, F64, U1, F64, F64, ) + +// Bitwise conversions +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) + +// Pseudo-operation, handled specially at final emit +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetSparseFromOp, U1, Opaque, ) +OPCODE(GetInBoundsFromOp, U1, Opaque, ) + +// Floating-point operations +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPClamp16, F16, F16, F16, F16, ) +OPCODE(FPClamp32, F32, F32, F32, F32, ) +OPCODE(FPClamp64, F64, F64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) + +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan16, U1, F16, ) +OPCODE(FPIsNan32, U1, F32, ) +OPCODE(FPIsNan64, U1, F64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) + +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SClamp32, U32, U32, U32, U32, ) +OPCODE(UClamp32, U32, U32, U32, U32, ) +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) + +// Atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicInc32, U32, U32, U32, ) +OPCODE(SharedAtomicDec32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange64, U64, U32, U64, ) + +OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicInc32, U32, U64, U32, ) +OPCODE(GlobalAtomicDec32, U32, U64, U32, ) +OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) +OPCODE(GlobalAtomicOr32, U32, U64, U32, ) +OPCODE(GlobalAtomicXor32, U32, U64, U32, ) +OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) +OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) +OPCODE(GlobalAtomicOr64, U64, U64, U64, ) +OPCODE(GlobalAtomicXor64, U64, U64, U64, ) +OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) +OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) + +OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) +OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) + +// Logical operations +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) + +// Conversion operations +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S8, F16, U32, ) +OPCODE(ConvertF16S16, F16, U32, ) +OPCODE(ConvertF16S32, F16, U32, ) +OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U8, F16, U32, ) +OPCODE(ConvertF16U16, F16, U32, ) +OPCODE(ConvertF16U32, F16, U32, ) +OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S8, F32, U32, ) +OPCODE(ConvertF32S16, F32, U32, ) +OPCODE(ConvertF32S32, F32, U32, ) +OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U8, F32, U32, ) +OPCODE(ConvertF32U16, F32, U32, ) +OPCODE(ConvertF32U32, F32, U32, ) +OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S8, F64, U32, ) +OPCODE(ConvertF64S16, F64, U32, ) +OPCODE(ConvertF64S32, F64, U32, ) +OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U8, F64, U32, ) +OPCODE(ConvertF64U16, F64, U32, ) +OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF64U64, F64, U64, ) + +// Image operations +OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) +OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, ) + +OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageRead, U32x4, U32, Opaque, ) +OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) + +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) + +// Atomic Image operations + +OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, ) + +OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, ) + +OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) + +// Warp operations +OPCODE(LaneId, U32, ) +OPCODE(VoteAll, U1, U1, ) +OPCODE(VoteAny, U1, U1, ) +OPCODE(VoteEqual, U1, U1, ) +OPCODE(SubgroupBallot, U32, U1, ) +OPCODE(SubgroupEqMask, U32, ) +OPCODE(SubgroupLtMask, U32, ) +OPCODE(SubgroupLeMask, U32, ) +OPCODE(SubgroupGtMask, U32, ) +OPCODE(SubgroupGeMask, U32, ) +OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) +OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) +OPCODE(DPdxFine, F32, F32, ) +OPCODE(DPdyFine, F32, F32, ) +OPCODE(DPdxCoarse, F32, F32, ) +OPCODE(DPdyCoarse, F32, F32, ) diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..4c956a970 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/patch.h" + +namespace Shader::IR { + +bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +u32 GenericPatchIndex(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4; +} + +u32 GenericPatchElement(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + ComponentPadding0, + ComponentPadding1, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast<u64>(Patch::Component119) == 127); + +[[nodiscard]] bool IsGeneric(Patch patch) noexcept; + +[[nodiscard]] u32 GenericPatchIndex(Patch patch); + +[[nodiscard]] u32 GenericPatchElement(Patch patch); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp new file mode 100644 index 000000000..16bc44101 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include <boost/container/flat_set.hpp> +#include <boost/container/small_vector.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" + +namespace Shader::IR { + +BlockList PostOrder(const AbstractSyntaxNode& root) { + boost::container::small_vector<Block*, 16> block_stack; + boost::container::flat_set<Block*> visited; + BlockList post_order_blocks; + + if (root.type != AbstractSyntaxNode::Type::Block) { + throw LogicError("First node in abstract syntax list root is not a block"); + } + Block* const first_block{root.data.block}; + visited.insert(first_block); + block_stack.push_back(first_block); + + while (!block_stack.empty()) { + Block* const block{block_stack.back()}; + const auto visit{[&](Block* branch) { + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on MSVC + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }}; + block_stack.pop_back(); + if (std::ranges::none_of(block->ImmSuccessors(), visit)) { + post_order_blocks.push_back(block); + } + } + return post_order_blocks; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h new file mode 100644 index 000000000..07bfbadc3 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +BlockList PostOrder(const AbstractSyntaxNode& root); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h new file mode 100644 index 000000000..4e7f32423 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <fmt/format.h> + +namespace Shader::IR { + +enum class Pred : u64 { + P0, + P1, + P2, + P3, + P4, + P5, + P6, + PT, +}; + +constexpr size_t NUM_USER_PREDS = 7; +constexpr size_t NUM_PREDS = 8; + +[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { + return static_cast<size_t>(pred); +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Pred> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Pred& pred, FormatContext& ctx) { + if (pred == Shader::IR::Pred::PT) { + return fmt::format_to(ctx.out(), "PT"); + } else { + return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred)); + } + } +}; diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp new file mode 100644 index 000000000..3fc06f855 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -0,0 +1,32 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <string> + +#include <fmt/format.h> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map<const IR::Inst*, size_t> inst_to_index; + std::map<const IR::Block*, size_t> block_to_index; + + for (const IR::Block* const block : program.blocks) { + block_to_index.emplace(block, index); + ++index; + } + std::string ret; + for (const auto& block : program.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h new file mode 100644 index 000000000..ebcaa8bc2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.h @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <string> + +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/program_header.h" +#include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" + +namespace Shader::IR { + +struct Program { + AbstractSyntaxList syntax_list; + BlockList blocks; + BlockList post_order_blocks; + Info info; + Stage stage{}; + std::array<u32, 3> workgroup_size{}; + OutputTopology output_topology{}; + u32 output_vertices{}; + u32 invocations{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + bool is_geometry_passthrough{}; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h new file mode 100644 index 000000000..a4b635792 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -0,0 +1,332 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <fmt/format.h> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Reg : u64 { + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, + R29, + R30, + R31, + R32, + R33, + R34, + R35, + R36, + R37, + R38, + R39, + R40, + R41, + R42, + R43, + R44, + R45, + R46, + R47, + R48, + R49, + R50, + R51, + R52, + R53, + R54, + R55, + R56, + R57, + R58, + R59, + R60, + R61, + R62, + R63, + R64, + R65, + R66, + R67, + R68, + R69, + R70, + R71, + R72, + R73, + R74, + R75, + R76, + R77, + R78, + R79, + R80, + R81, + R82, + R83, + R84, + R85, + R86, + R87, + R88, + R89, + R90, + R91, + R92, + R93, + R94, + R95, + R96, + R97, + R98, + R99, + R100, + R101, + R102, + R103, + R104, + R105, + R106, + R107, + R108, + R109, + R110, + R111, + R112, + R113, + R114, + R115, + R116, + R117, + R118, + R119, + R120, + R121, + R122, + R123, + R124, + R125, + R126, + R127, + R128, + R129, + R130, + R131, + R132, + R133, + R134, + R135, + R136, + R137, + R138, + R139, + R140, + R141, + R142, + R143, + R144, + R145, + R146, + R147, + R148, + R149, + R150, + R151, + R152, + R153, + R154, + R155, + R156, + R157, + R158, + R159, + R160, + R161, + R162, + R163, + R164, + R165, + R166, + R167, + R168, + R169, + R170, + R171, + R172, + R173, + R174, + R175, + R176, + R177, + R178, + R179, + R180, + R181, + R182, + R183, + R184, + R185, + R186, + R187, + R188, + R189, + R190, + R191, + R192, + R193, + R194, + R195, + R196, + R197, + R198, + R199, + R200, + R201, + R202, + R203, + R204, + R205, + R206, + R207, + R208, + R209, + R210, + R211, + R212, + R213, + R214, + R215, + R216, + R217, + R218, + R219, + R220, + R221, + R222, + R223, + R224, + R225, + R226, + R227, + R228, + R229, + R230, + R231, + R232, + R233, + R234, + R235, + R236, + R237, + R238, + R239, + R240, + R241, + R242, + R243, + R244, + R245, + R246, + R247, + R248, + R249, + R250, + R251, + R252, + R253, + R254, + RZ, +}; +static_assert(static_cast<int>(Reg::RZ) == 255); + +constexpr size_t NUM_USER_REGS = 255; +constexpr size_t NUM_REGS = 256; + +[[nodiscard]] constexpr Reg operator+(Reg reg, int num) { + if (reg == Reg::RZ) { + // Adding or subtracting registers from RZ yields RZ + return Reg::RZ; + } + const int result{static_cast<int>(reg) + num}; + if (result >= static_cast<int>(Reg::RZ)) { + throw LogicError("Overflow on register arithmetic"); + } + if (result < 0) { + throw LogicError("Underflow on register arithmetic"); + } + return static_cast<Reg>(result); +} + +[[nodiscard]] constexpr Reg operator-(Reg reg, int num) { + return reg + (-num); +} + +constexpr Reg operator++(Reg& reg) { + reg = reg + 1; + return reg; +} + +constexpr Reg operator++(Reg& reg, int) { + const Reg copy{reg}; + reg = reg + 1; + return copy; +} + +[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { + return static_cast<size_t>(reg); +} + +[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { + return RegIndex(reg) % align == 0 || reg == Reg::RZ; +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Reg> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Reg& reg, FormatContext& ctx) { + if (reg == Shader::IR::Reg::RZ) { + return fmt::format_to(ctx.out(), "RZ"); + } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) { + return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg)); + } else { + throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg)); + } + } +}; diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp new file mode 100644 index 000000000..f28341bfe --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <string> + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +std::string NameOf(Type type) { + static constexpr std::array names{ + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", + "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3", + "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4", + }; + const size_t bits{static_cast<size_t>(type)}; + if (bits == 0) { + return "Void"; + } + std::string result; + for (size_t i = 0; i < names.size(); i++) { + if ((bits & (size_t{1} << i)) != 0) { + if (!result.empty()) { + result += '|'; + } + result += names[i]; + } + } + return result; +} + +bool AreTypesCompatible(Type lhs, Type rhs) noexcept { + return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h new file mode 100644 index 000000000..294b230c4 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.h @@ -0,0 +1,61 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include <fmt/format.h> + +#include "common/common_funcs.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Type { + Void = 0, + Opaque = 1 << 0, + Reg = 1 << 1, + Pred = 1 << 2, + Attribute = 1 << 3, + Patch = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, +}; +DECLARE_ENUM_FLAG_OPERATORS(Type) + +[[nodiscard]] std::string NameOf(Type type); + +[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept; + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Type> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Type& type, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", NameOf(type)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp new file mode 100644 index 000000000..d365ea1bc --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} + +Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} + +Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} + +Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} + +Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} + +Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} + +Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} + +Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} + +Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} + +Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} + +Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} + +Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} + +IR::Type Value::Type() const noexcept { + if (IsPhi()) { + // The type of a phi node is stored in its flags + return inst->Flags<IR::Type>(); + } + if (IsIdentity()) { + return inst->Arg(0).Type(); + } + if (type == Type::Opaque) { + return inst->Type(); + } + return type; +} + +bool Value::operator==(const Value& other) const { + if (type != other.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Opaque: + return inst == other.inst; + case Type::Reg: + return reg == other.reg; + case Type::Pred: + return pred == other.pred; + case Type::Attribute: + return attribute == other.attribute; + case Type::Patch: + return patch == other.patch; + case Type::U1: + return imm_u1 == other.imm_u1; + case Type::U8: + return imm_u8 == other.imm_u8; + case Type::U16: + case Type::F16: + return imm_u16 == other.imm_u16; + case Type::U32: + case Type::F32: + return imm_u32 == other.imm_u32; + case Type::U64: + case Type::F64: + return imm_u64 == other.imm_u64; + case Type::U32x2: + case Type::U32x3: + case Type::U32x4: + case Type::F16x2: + case Type::F16x3: + case Type::F16x4: + case Type::F32x2: + case Type::F32x3: + case Type::F32x4: + case Type::F64x2: + case Type::F64x3: + case Type::F64x4: + break; + } + throw LogicError("Invalid type {}", type); +} + +bool Value::operator!=(const Value& other) const { + return !operator==(other); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h new file mode 100644 index 000000000..0c6bf684d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.h @@ -0,0 +1,398 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstring> +#include <memory> +#include <type_traits> +#include <utility> +#include <vector> + +#include <boost/container/small_vector.hpp> +#include <boost/intrusive/list.hpp> + +#include "common/assert.h" +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/patch.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class Block; +class Inst; + +struct AssociatedInsts; + +class Value { +public: + Value() noexcept = default; + explicit Value(IR::Inst* value) noexcept; + explicit Value(IR::Reg value) noexcept; + explicit Value(IR::Pred value) noexcept; + explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch value) noexcept; + explicit Value(bool value) noexcept; + explicit Value(u8 value) noexcept; + explicit Value(u16 value) noexcept; + explicit Value(u32 value) noexcept; + explicit Value(f32 value) noexcept; + explicit Value(u64 value) noexcept; + explicit Value(f64 value) noexcept; + + [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsPhi() const noexcept; + [[nodiscard]] bool IsEmpty() const noexcept; + [[nodiscard]] bool IsImmediate() const noexcept; + [[nodiscard]] IR::Type Type() const noexcept; + + [[nodiscard]] IR::Inst* Inst() const; + [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Value Resolve() const; + [[nodiscard]] IR::Reg Reg() const; + [[nodiscard]] IR::Pred Pred() const; + [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; + [[nodiscard]] bool U1() const; + [[nodiscard]] u8 U8() const; + [[nodiscard]] u16 U16() const; + [[nodiscard]] u32 U32() const; + [[nodiscard]] f32 F32() const; + [[nodiscard]] u64 U64() const; + [[nodiscard]] f64 F64() const; + + [[nodiscard]] bool operator==(const Value& other) const; + [[nodiscard]] bool operator!=(const Value& other) const; + +private: + IR::Type type{}; + union { + IR::Inst* inst{}; + IR::Reg reg; + IR::Pred pred; + IR::Attribute attribute; + IR::Patch patch; + bool imm_u1; + u8 imm_u8; + u16 imm_u16; + u32 imm_u32; + f32 imm_f32; + u64 imm_u64; + f64 imm_f64; + }; +}; +static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero"); +static_assert(std::is_trivially_copyable_v<Value>); + +template <IR::Type type_> +class TypedValue : public Value { +public: + TypedValue() = default; + + template <IR::Type other_type> + requires((other_type & type_) != IR::Type::Void) explicit(false) + TypedValue(const TypedValue<other_type>& value) + : Value(value) {} + + explicit TypedValue(const Value& value) : Value(value) { + if ((value.Type() & type_) == IR::Type::Void) { + throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); + } + } + + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} +}; + +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { + return op; + } + + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines whether or not this instruction is a pseudo-instruction. + /// Pseudo-instructions depend on their parent instructions for their semantics. + [[nodiscard]] bool IsPseudoInstruction() const noexcept; + + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const; + + /// Gets a pseudo-operation associated with this instruction + [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const { + return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == IR::Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + + void ReplaceOpcode(IR::Opcode opcode); + + template <typename FlagsType> + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); + return ret; + } + + template <typename FlagsType> + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + [[nodiscard]] void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); + } + + /// Intrusively store the host definition of this instruction. + template <typename DefinitionType> + void SetDefinition(DefinitionType def) { + definition = Common::BitCast<u32>(def); + } + + /// Return the intrusively stored host definition of this instruction. + template <typename DefinitionType> + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast<DefinitionType>(definition); + } + + /// Destructively remove one reference count from the instruction + /// Useful for register allocation + void DestructiveRemoveUsage() { + --use_count; + } + + /// Destructively add usages to the instruction + /// Useful for register allocation + void DestructiveAddUsage(int count) { + use_count += count; + } + +private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + u32 flags{}; + u32 definition{}; + union { + NonTriviallyDummy dummy{}; + boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args; + std::array<Value, 5> args; + }; + std::unique_ptr<AssociatedInsts> associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* in_bounds_inst; + Inst* sparse_inst; + Inst* zero_inst{}; + }; + Inst* sign_inst{}; + Inst* carry_inst{}; + Inst* overflow_inst{}; +}; + +using U1 = TypedValue<Type::U1>; +using U8 = TypedValue<Type::U8>; +using U16 = TypedValue<Type::U16>; +using U32 = TypedValue<Type::U32>; +using U64 = TypedValue<Type::U64>; +using F16 = TypedValue<Type::F16>; +using F32 = TypedValue<Type::F32>; +using F64 = TypedValue<Type::F64>; +using U32U64 = TypedValue<Type::U32 | Type::U64>; +using F32F64 = TypedValue<Type::F32 | Type::F64>; +using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; +using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>; +using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; + +inline bool Value::IsIdentity() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; +} + +inline bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; +} + +inline bool Value::IsEmpty() const noexcept { + return type == Type::Void; +} + +inline bool Value::IsImmediate() const noexcept { + IR::Type current_type{type}; + const IR::Inst* current_inst{inst}; + while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) { + const Value& arg{current_inst->Arg(0)}; + current_type = arg.type; + current_inst = arg.inst; + } + return current_type != Type::Opaque; +} + +inline IR::Inst* Value::Inst() const { + DEBUG_ASSERT(type == Type::Opaque); + return inst; +} + +inline IR::Inst* Value::InstRecursive() const { + DEBUG_ASSERT(type == Type::Opaque); + if (IsIdentity()) { + return inst->Arg(0).InstRecursive(); + } + return inst; +} + +inline IR::Value Value::Resolve() const { + if (IsIdentity()) { + return inst->Arg(0).Resolve(); + } + return *this; +} + +inline IR::Reg Value::Reg() const { + DEBUG_ASSERT(type == Type::Reg); + return reg; +} + +inline IR::Pred Value::Pred() const { + DEBUG_ASSERT(type == Type::Pred); + return pred; +} + +inline IR::Attribute Value::Attribute() const { + DEBUG_ASSERT(type == Type::Attribute); + return attribute; +} + +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + +inline bool Value::U1() const { + if (IsIdentity()) { + return inst->Arg(0).U1(); + } + DEBUG_ASSERT(type == Type::U1); + return imm_u1; +} + +inline u8 Value::U8() const { + if (IsIdentity()) { + return inst->Arg(0).U8(); + } + DEBUG_ASSERT(type == Type::U8); + return imm_u8; +} + +inline u16 Value::U16() const { + if (IsIdentity()) { + return inst->Arg(0).U16(); + } + DEBUG_ASSERT(type == Type::U16); + return imm_u16; +} + +inline u32 Value::U32() const { + if (IsIdentity()) { + return inst->Arg(0).U32(); + } + DEBUG_ASSERT(type == Type::U32); + return imm_u32; +} + +inline f32 Value::F32() const { + if (IsIdentity()) { + return inst->Arg(0).F32(); + } + DEBUG_ASSERT(type == Type::F32); + return imm_f32; +} + +inline u64 Value::U64() const { + if (IsIdentity()) { + return inst->Arg(0).U64(); + } + DEBUG_ASSERT(type == Type::U64); + return imm_u64; +} + +inline f64 Value::F64() const { + if (IsIdentity()) { + return inst->Arg(0).F64(); + } + DEBUG_ASSERT(type == Type::F64); + return imm_f64; +} + +[[nodiscard]] inline bool IsPhi(const Inst& inst) { + return inst.GetOpcode() == Opcode::Phi; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..1a954a509 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -0,0 +1,642 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <optional> +#include <string> +#include <utility> + +#include <fmt/format.h> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell::Flow { +namespace { +struct Compare { + bool operator()(const Block& lhs, Location rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(Location lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; + +u32 BranchOffset(Location pc, Instruction inst) { + return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u; +} + +void Split(Block* old_block, Block* new_block, Location pc) { + if (pc <= old_block->begin || pc >= old_block->end) { + throw InvalidArgument("Invalid address to split={}", pc); + } + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class; + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; +} + +Token OpcodeToken(Opcode opcode) { + switch (opcode) { + case Opcode::PBK: + case Opcode::BRK: + return Token::PBK; + case Opcode::PCNT: + case Opcode::CONT: + return Token::PBK; + case Opcode::PEXIT: + case Opcode::EXIT: + return Token::PEXIT; + case Opcode::PLONGJMP: + case Opcode::LONGJMP: + return Token::PLONGJMP; + case Opcode::PRET: + case Opcode::RET: + case Opcode::CAL: + return Token::PRET; + case Opcode::SSY: + case Opcode::SYNC: + return Token::SSY; + default: + throw InvalidArgument("{}", opcode); + } +} + +bool IsAbsoluteJump(Opcode opcode) { + switch (opcode) { + case Opcode::JCAL: + case Opcode::JMP: + case Opcode::JMX: + return true; + default: + return false; + } +} + +bool HasFlowTest(Opcode opcode) { + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::EXIT: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::KIL: + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::RET: + case Opcode::SYNC: + return true; + case Opcode::CAL: + case Opcode::JCAL: + return false; + default: + throw InvalidArgument("Invalid branch {}", opcode); + } +} + +std::string NameOf(const Block& block) { + if (block.begin.IsVirtual()) { + return fmt::format("\"Virtual {}\"", block.begin); + } else { + return fmt::format("\"{}\"", block.begin); + } +} +} // Anonymous namespace + +void Stack::Push(Token token, Location target) { + entries.push_back({ + .token = token, + .target{target}, + }); +} + +std::pair<Location, Stack> Stack::Pop(Token token) const { + const std::optional<Location> pc{Peek(token)}; + if (!pc) { + throw LogicError("Token could not be found"); + } + return {*pc, Remove(token)}; +} + +std::optional<Location> Stack::Peek(Token token) const { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + if (it == entries.rend()) { + return std::nullopt; + } + return it->target; +} + +Stack Stack::Remove(Token token) const { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + const auto pos{std::distance(entries.rbegin(), it)}; + Stack result; + result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); + return result; +} + +bool Block::Contains(Location pc) const noexcept { + return pc >= begin && pc < end; +} + +Function::Function(ObjectPool<Block>& block_pool, Location start_address) + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} + +CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address, + bool exits_to_dispatcher_) + : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ + exits_to_dispatcher_} { + if (exits_to_dispatcher) { + dispatch_block = block_pool.Create(Block{}); + dispatch_block->begin = {}; + dispatch_block->end = {}; + dispatch_block->end_class = EndClass::Exit; + dispatch_block->cond = IR::Condition(true); + dispatch_block->stack = {}; + dispatch_block->branch_true = nullptr; + dispatch_block->branch_false = nullptr; + } + functions.emplace_back(block_pool, start_address); + for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { + while (!functions[function_id].labels.empty()) { + Function& function{functions[function_id]}; + Label label{function.labels.back()}; + function.labels.pop_back(); + AnalyzeLabel(function_id, label); + } + } + if (exits_to_dispatcher) { + const auto last_block{functions[0].blocks.rbegin()}; + dispatch_block->begin = last_block->end + 1; + dispatch_block->end = last_block->end + 1; + functions[0].blocks.insert(*dispatch_block); + } +} + +void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { + if (InspectVisitedBlocks(function_id, label)) { + // Label address has been visited + return; + } + // Try to find the next block + Function* const function{&functions[function_id]}; + Location pc{label.address}; + const auto next_it{function->blocks.upper_bound(pc, Compare{})}; + const bool is_last{next_it == function->blocks.end()}; + Block* const next{is_last ? nullptr : &*next_it}; + // Insert before the next block + Block* const block{label.block}; + // Analyze instructions until it reaches an already visited block or there's a branch + bool is_branch{false}; + while (!next || pc < next->begin) { + is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; + if (is_branch) { + break; + } + ++pc; + } + if (!is_branch) { + // If the block finished without a branch, + // it means that the next instruction is already visited, jump to it + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = next; + block->branch_false = nullptr; + } + // Function's pointer might be invalid, resolve it again + // Insert the new block + functions[function_id].blocks.insert(*block); +} + +bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { + const Location pc{label.address}; + Function& function{functions[function_id]}; + const auto it{ + std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; + if (it == function.blocks.end()) { + // Address has not been visited + return false; + } + Block* const visited_block{&*it}; + if (visited_block->begin == pc) { + throw LogicError("Dangling block"); + } + Block* const new_block{label.block}; + Split(visited_block, new_block, pc); + function.blocks.insert(it, *new_block); + return true; +} + +CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { + const Instruction inst{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(inst.raw)}; + switch (opcode) { + case Opcode::BRA: + case Opcode::JMP: + case Opcode::RET: + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + switch (opcode) { + case Opcode::BRA: + case Opcode::JMP: + AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::RET: + block->end_class = EndClass::Return; + break; + default: + break; + } + block->end = pc; + return AnalysisState::Branch; + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::SYNC: { + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; + block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block->end = pc; + return AnalysisState::Branch; + } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast<IR::Pred>(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } + case Opcode::PBK: + case Opcode::PCNT: + case Opcode::PEXIT: + case Opcode::PLONGJMP: + case Opcode::SSY: + block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + return AnalysisState::Continue; + case Opcode::BRX: + case Opcode::JMX: + return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); + case Opcode::EXIT: + return AnalyzeEXIT(block, function_id, pc, inst); + case Opcode::PRET: + throw NotImplementedException("PRET flow analysis"); + case Opcode::CAL: + case Opcode::JCAL: { + const bool is_absolute{IsAbsoluteJump(opcode)}; + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function into the list if it doesn't exist + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + const bool exists{it != functions.end()}; + const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it)) + : functions.size()}; + if (!exists) { + functions.emplace_back(block_pool, cal_pc); + } + block->end_class = EndClass::Call; + block->function_call = call_id; + block->return_block = AddLabel(block, block->stack, pc + 1, function_id); + block->end = pc; + return AnalysisState::Branch; + } + default: + break; + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{true} || pred == Predicate{false}) { + return AnalysisState::Continue; + } + const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + return AnalysisState::Branch; +} + +void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, + EndClass insn_end_class, IR::Condition cond) { + if (block->begin != pc) { + // If the block doesn't start in the conditional instruction + // mark it as a label to visit it later + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, block->stack, pc, function_id); + block->branch_false = nullptr; + return; + } + // Create a virtual block and a conditional block + Block* const conditional_block{block_pool.Create()}; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; + // Save the contents of the visited block in the conditional block + *conditional_block = std::move(*block); + // Impersonate the visited block with a virtual block + *block = std::move(virtual_block); + // Set the end properties of the conditional instruction + conditional_block->end = pc + 1; + conditional_block->end_class = insn_end_class; + // Add a label to the instruction after the conditional instruction + Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; + // Branch to the next instruction from the virtual block + block->branch_false = endif_block; + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { + conditional_block->cond = IR::Condition{true}; + conditional_block->branch_true = endif_block; + conditional_block->branch_false = nullptr; + } + // Finally insert the condition block into the list of blocks + functions[function_id].blocks.insert(*conditional_block); +} + +bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode) { + if (inst.branch.is_cbuf) { + throw NotImplementedException("Branch with constant buffer offset"); + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false}) { + return false; + } + const bool has_flow_test{HasFlowTest(opcode)}; + const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); + } else { + block->cond = IR::Condition{true}; + } + return true; +} + +void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute) { + const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); +} + +CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id) { + const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; + if (!brx_table) { + TrackIndirectBranchTable(env, pc, program_start); + throw NotImplementedException("Failed to track indirect branch"); + } + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { + throw NotImplementedException("Conditional indirect branch"); + } + std::vector<u32> targets; + targets.reserve(brx_table->num_entries); + for (u32 i = 0; i < brx_table->num_entries; ++i) { + u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; + if (!is_absolute) { + target += pc.Offset(); + } + target += static_cast<u32>(brx_table->branch_offset); + target += 8; + targets.push_back(target); + } + std::ranges::sort(targets); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + + block->indirect_branches.reserve(targets.size()); + for (const u32 target : targets) { + Block* const branch{AddLabel(block, block->stack, target, function_id)}; + block->indirect_branches.push_back({ + .block = branch, + .address = target, + }); + } + block->cond = IR::Condition{true}; + block->end = pc + 1; + block->end_class = EndClass::IndirectBranch; + block->branch_reg = brx_table->branch_reg; + block->branch_offset = brx_table->branch_offset + 8; + if (!is_absolute) { + block->branch_offset += pc.Offset(); + } + return AnalysisState::Branch; +} + +CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, + Instruction inst) { + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { + // EXIT will never be taken + return AnalysisState::Continue; + } + if (exits_to_dispatcher && function_id != 0) { + throw NotImplementedException("Dispatch EXIT on external function"); + } + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + if (block->stack.Peek(Token::PEXIT).has_value()) { + throw NotImplementedException("Conditional EXIT with PEXIT token"); + } + const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; + if (exits_to_dispatcher) { + block->end = pc; + block->end_class = EndClass::Branch; + block->cond = cond; + block->branch_true = dispatch_block; + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); + return AnalysisState::Branch; + } + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + return AnalysisState::Branch; + } + if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block->branch_false = nullptr; + return AnalysisState::Branch; + } + if (exits_to_dispatcher) { + block->cond = IR::Condition{true}; + block->end = pc; + block->end_class = EndClass::Branch; + block->branch_true = dispatch_block; + block->branch_false = nullptr; + return AnalysisState::Branch; + } + block->end = pc + 1; + block->end_class = EndClass::Exit; + return AnalysisState::Branch; +} + +Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { + Function& function{functions[function_id]}; + if (block->begin == pc) { + // Jumps to itself + return block; + } + if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { + // Block already exists and it has been visited + if (function.blocks.begin() != it) { + // Check if the previous node is the virtual variant of the label + // This won't exist if a virtual node is not needed or it hasn't been visited + // If it hasn't been visited and a virtual node is needed, this will still behave as + // expected because the node impersonated with its virtual node. + const auto prev{std::prev(it)}; + if (it->begin.Virtual() == prev->begin) { + return &*prev; + } + } + return &*it; + } + // Make sure we don't insert the same layer twice + const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; + if (label_it != function.labels.end()) { + return label_it->block; + } + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; + function.labels.push_back(Label{ + .address{pc}, + .block = new_block, + .stack{std::move(stack)}, + }); + return new_block; +} + +std::string CFG::Dot() const { + int node_uid{0}; + + std::string dot{"digraph shader {\n"}; + for (const Function& function : functions) { + dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); + dot += fmt::format("\t\tnode [style=filled];\n"); + for (const Block& block : function.blocks) { + const std::string name{NameOf(block)}; + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); + if (add_label && block.cond != IR::Condition{true} && + block.cond != IR::Condition{false}) { + dot += fmt::format(" [label=\"{}\"]", block.cond); + } + dot += '\n'; + }; + dot += fmt::format("\t\t{};\n", name); + switch (block.end_class) { + case EndClass::Branch: + if (block.cond != IR::Condition{false}) { + add_branch(block.branch_true, true); + } + if (block.cond != IR::Condition{true}) { + add_branch(block.branch_false, false); + } + break; + case EndClass::IndirectBranch: + for (const IndirectBranch& branch : block.indirect_branches) { + add_branch(branch.block, false); + } + break; + case EndClass::Call: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); + dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", + node_uid, block.function_call); + dot += '\n'; + ++node_uid; + break; + case EndClass::Exit: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Return: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + } + } + if (function.entrypoint == 8) { + dot += fmt::format("\t\tlabel = \"main\";\n"); + } else { + dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); + } + dot += "\t}\n"; + } + if (!functions.empty()) { + auto& function{functions.front()}; + if (function.blocks.empty()) { + dot += "Start;\n"; + } else { + dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); + } + dot += fmt::format("\tStart [shape=diamond];\n"); + } + dot += "}\n"; + return dot; +} + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..a6bd3e196 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -0,0 +1,169 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <compare> +#include <optional> +#include <span> +#include <string> +#include <vector> + +#include <boost/container/small_vector.hpp> +#include <boost/intrusive/set.hpp> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/condition.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell::Flow { + +struct Block; + +using FunctionId = size_t; + +enum class EndClass { + Branch, + IndirectBranch, + Call, + Exit, + Return, + Kill, +}; + +enum class Token { + SSY, + PBK, + PEXIT, + PRET, + PCNT, + PLONGJMP, +}; + +struct StackEntry { + auto operator<=>(const StackEntry&) const noexcept = default; + + Token token; + Location target; +}; + +class Stack { +public: + void Push(Token token, Location target); + [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const; + [[nodiscard]] std::optional<Location> Peek(Token token) const; + [[nodiscard]] Stack Remove(Token token) const; + +private: + boost::container::small_vector<StackEntry, 3> entries; +}; + +struct IndirectBranch { + Block* block; + u32 address; +}; + +struct Block : boost::intrusive::set_base_hook< + // Normal link is ~2.5% faster compared to safe link + boost::intrusive::link_mode<boost::intrusive::normal_link>> { + [[nodiscard]] bool Contains(Location pc) const noexcept; + + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + + Location begin; + Location end; + EndClass end_class{}; + IR::Condition cond{}; + Stack stack; + Block* branch_true{}; + Block* branch_false{}; + FunctionId function_call{}; + Block* return_block{}; + IR::Reg branch_reg{}; + s32 branch_offset{}; + std::vector<IndirectBranch> indirect_branches; +}; + +struct Label { + Location address; + Block* block; + Stack stack; +}; + +struct Function { + explicit Function(ObjectPool<Block>& block_pool, Location start_address); + + Location entrypoint; + boost::container::small_vector<Label, 16> labels; + boost::intrusive::set<Block> blocks; +}; + +class CFG { + enum class AnalysisState { + Branch, + Continue, + }; + +public: + explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address, + bool exits_to_dispatcher = false); + + CFG& operator=(const CFG&) = delete; + CFG(const CFG&) = delete; + + CFG& operator=(CFG&&) = delete; + CFG(CFG&&) = delete; + + [[nodiscard]] std::string Dot() const; + + [[nodiscard]] std::span<const Function> Functions() const noexcept { + return std::span(functions.data(), functions.size()); + } + [[nodiscard]] std::span<Function> Functions() noexcept { + return std::span(functions.data(), functions.size()); + } + + [[nodiscard]] bool ExitsToDispatcher() const { + return exits_to_dispatcher; + } + +private: + void AnalyzeLabel(FunctionId function_id, Label& label); + + /// Inspect already visited blocks. + /// Return true when the block has already been visited + bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + + AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); + + void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, + IR::Condition cond); + + /// Return true when the branch instruction is confirmed to be a branch + bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode); + + void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute); + AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id); + AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); + + /// Return the branch target block id + Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); + + Environment& env; + ObjectPool<Block>& block_pool; + boost::container::small_vector<Function, 1> functions; + Location program_start; + bool exits_to_dispatcher{}; + Block* dispatch_block{}; +}; + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..972f677dc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <bit> +#include <memory> +#include <string_view> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +struct MaskValue { + u64 mask; + u64 value; +}; + +constexpr MaskValue MaskValueFromEncoding(const char* encoding) { + u64 mask{}; + u64 value{}; + u64 bit{u64(1) << 63}; + while (*encoding) { + switch (*encoding) { + case '0': + mask |= bit; + break; + case '1': + mask |= bit; + value |= bit; + break; + case '-': + break; + case ' ': + break; + default: + throw LogicError("Invalid encoding character '{}'", *encoding); + } + ++encoding; + if (*encoding != ' ') { + bit >>= 1; + } + } + return MaskValue{.mask = mask, .value = value}; +} + +struct InstEncoding { + MaskValue mask_value; + Opcode opcode; +}; +constexpr std::array UNORDERED_ENCODINGS{ +#define INST(name, cute, encode) \ + InstEncoding{ \ + .mask_value{MaskValueFromEncoding(encode)}, \ + .opcode = Opcode::name, \ + }, +#include "maxwell.inc" +#undef INST +}; + +constexpr auto SortedEncodings() { + std::array encodings{UNORDERED_ENCODINGS}; + std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { + return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); + }); + return encodings; +} +constexpr auto ENCODINGS{SortedEncodings()}; + +constexpr int WidestLeftBits() { + int bits{64}; + for (const InstEncoding& encoding : ENCODINGS) { + bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); + } + return 64 - bits; +} +constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; +constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; + +constexpr size_t ToFastLookupIndex(u64 value) { + return static_cast<size_t>(value >> MASK_SHIFT); +} + +constexpr size_t FastLookupSize() { + size_t max_width{}; + for (const InstEncoding& encoding : ENCODINGS) { + max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); + } + return max_width + 1; +} +constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; + +struct InstInfo { + [[nodiscard]] u64 Mask() const noexcept { + return static_cast<u64>(high_mask) << MASK_SHIFT; + } + + [[nodiscard]] u64 Value() const noexcept { + return static_cast<u64>(high_value) << MASK_SHIFT; + } + + u16 high_mask; + u16 high_value; + Opcode opcode; +}; + +constexpr auto MakeFastLookupTableIndex(size_t index) { + std::array<InstInfo, 2> encodings{}; + size_t element{}; + for (const auto& encoding : ENCODINGS) { + const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; + const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; + if ((index & mask) == value) { + encodings.at(element) = InstInfo{ + .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, + }; + ++element; + } + } + return encodings; +} + +/*constexpr*/ auto MakeFastLookupTable() { + auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()}; + for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { + (*encodings)[index] = MakeFastLookupTableIndex(index); + } + return encodings; +} +const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; +} // Anonymous namespace + +Opcode Decode(u64 insn) { + const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; + const auto it{std::ranges::find_if( + table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; + if (it == table.end()) { + throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); + } + return it->opcode; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..b4f080fd7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { + +[[nodiscard]] Opcode Decode(u64 insn); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..008625cb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +namespace { +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 19, u64> immediate; + BitField<56, 1, u64> is_negative; + BitField<20, 24, s64> brx_offset; +}; + +template <typename Callable> +std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { + while (pos >= block_begin) { + const u64 insn{env.ReadInstruction(pos.Offset())}; + --pos; + if (func(insn, Decode(insn))) { + return insn; + } + } + return std::nullopt; +} + +std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos, + IR::Reg brx_reg) { + return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { + const LDC::Encoding ldc{insn}; + return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && + ldc.mode == LDC::Mode::Default; + }); +} + +std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos, + IR::Reg ldc_reg) { + return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { + const Encoding shl{insn}; + return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; + }); +} + +std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos, + IR::Reg shl_reg) { + return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { + const Encoding imnmx{insn}; + return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; + }); +} +} // Anonymous namespace + +std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin) { + const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; + const Opcode brx_opcode{Decode(brx_insn)}; + if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { + throw LogicError("Tracked instruction is not BRX or JMX"); + } + const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; + const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)}; + + Location pos{brx_pos}; + const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; + if (!ldc_insn) { + return std::nullopt; + } + const LDC::Encoding ldc{*ldc_insn}; + const u32 cbuf_index{static_cast<u32>(ldc.index)}; + const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))}; + const IR::Reg ldc_reg{ldc.src_reg}; + + const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; + if (!shl_insn) { + return std::nullopt; + } + const Encoding shl{*shl_insn}; + const IR::Reg shl_reg{shl.src_reg}; + + const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; + if (!imnmx_insn) { + return std::nullopt; + } + const Encoding imnmx{*imnmx_insn}; + if (imnmx.is_negative != 0) { + return std::nullopt; + } + const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; + return IndirectBranchTableInfo{ + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, + }; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell { + +struct IndirectBranchTableInfo { + u32 cbuf_index{}; + u32 cbuf_offset{}; + u32 num_entries{}; + s32 branch_offset{}; + IR::Reg branch_reg{}; +}; + +std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..743d68d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -0,0 +1,63 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell { + +struct Predicate { + Predicate() = default; + Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} + Predicate(bool value) : index{7}, negated{!value} {} + Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {} + + unsigned index; + bool negated; +}; + +inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { + return lhs.index == rhs.index && lhs.negated == rhs.negated; +} + +inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { + return !(lhs == rhs); +} + +union Instruction { + Instruction(u64 raw_) : raw{raw_} {} + + u64 raw; + + union { + BitField<5, 1, u64> is_cbuf; + BitField<0, 5, IR::FlowTest> flow_test; + + [[nodiscard]] u32 Absolute() const noexcept { + return static_cast<u32>(absolute); + } + + [[nodiscard]] s32 Offset() const noexcept { + return static_cast<s32>(offset); + } + + private: + BitField<20, 24, s64> offset; + BitField<20, 32, u64> absolute; + } branch; + + [[nodiscard]] Predicate Pred() const noexcept { + return Predicate{pred}; + } + +private: + BitField<16, 4, u64> pred; +}; +static_assert(std::is_trivially_copyable_v<Instruction>); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..26d29eae2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -0,0 +1,112 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <compare> +#include <iterator> + +#include <fmt/format.h> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Maxwell { + +class Location { + static constexpr u32 VIRTUAL_BIAS{4}; + +public: + constexpr Location() = default; + + constexpr Location(u32 initial_offset) : offset{initial_offset} { + if (initial_offset % 8 != 0) { + throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); + } + Align(); + } + + constexpr Location Virtual() const noexcept { + Location virtual_location; + virtual_location.offset = offset - VIRTUAL_BIAS; + return virtual_location; + } + + [[nodiscard]] constexpr u32 Offset() const noexcept { + return offset; + } + + [[nodiscard]] constexpr bool IsVirtual() const { + return offset % 8 == VIRTUAL_BIAS; + } + + constexpr auto operator<=>(const Location&) const noexcept = default; + + constexpr Location operator++() noexcept { + const Location copy{*this}; + Step(); + return copy; + } + + constexpr Location operator++(int) noexcept { + Step(); + return *this; + } + + constexpr Location operator--() noexcept { + const Location copy{*this}; + Back(); + return copy; + } + + constexpr Location operator--(int) noexcept { + Back(); + return *this; + } + + constexpr Location operator+(int number) const { + Location new_pc{*this}; + while (number > 0) { + --number; + ++new_pc; + } + while (number < 0) { + ++number; + --new_pc; + } + return new_pc; + } + + constexpr Location operator-(int number) const { + return operator+(-number); + } + +private: + constexpr void Align() { + offset += offset % 32 == 0 ? 8 : 0; + } + + constexpr void Step() { + offset += 8 + (offset % 32 == 24 ? 8 : 0); + } + + constexpr void Back() { + offset -= 8 + (offset % 32 == 8 ? 8 : 0); + } + + u32 offset{0xcccccccc}; +}; + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter<Shader::Maxwell::Location> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..2fee591bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -0,0 +1,286 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +INST(AL2P, "AL2P", "1110 1111 1010 0---") +INST(ALD, "ALD", "1110 1111 1101 1---") +INST(AST, "AST", "1110 1111 1111 0---") +INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") +INST(ATOM, "ATOM", "1110 1101 ---- ----") +INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") +INST(ATOMS, "ATOMS", "1110 1100 ---- ----") +INST(B2R, "B2R", "1111 0000 1011 1---") +INST(BAR, "BAR", "1111 0000 1010 1---") +INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") +INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") +INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") +INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") +INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") +INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") +INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") +INST(BPT, "BPT", "1110 0011 1010 ----") +INST(BRA, "BRA", "1110 0010 0100 ----") +INST(BRK, "BRK", "1110 0011 0100 ----") +INST(BRX, "BRX", "1110 0010 0101 ----") +INST(CAL, "CAL", "1110 0010 0110 ----") +INST(CCTL, "CCTL", "1110 1111 011- ----") +INST(CCTLL, "CCTLL", "1110 1111 100- ----") +INST(CONT, "CONT", "1110 0011 0101 ----") +INST(CS2R, "CS2R", "0101 0000 1100 1---") +INST(CSET, "CSET", "0101 0000 1001 1---") +INST(CSETP, "CSETP", "0101 0000 1010 0---") +INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") +INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") +INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") +INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") +INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") +INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") +INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") +INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") +INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") +INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") +INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") +INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") +INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") +INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") +INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") +INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") +INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") +INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") +INST(EXIT, "EXIT", "1110 0011 0000 ----") +INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") +INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") +INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") +INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") +INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") +INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") +INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") +INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") +INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") +INST(FADD32I, "FADD32I", "0000 10-- ---- ----") +INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") +INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") +INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") +INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") +INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") +INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") +INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") +INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") +INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") +INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") +INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") +INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") +INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") +INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") +INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") +INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") +INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") +INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") +INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") +INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") +INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") +INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") +INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") +INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") +INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") +INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") +INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") +INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") +INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") +INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") +INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") +INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") +INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") +INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") +INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") +INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") +INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") +INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") +INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") +INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") +INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") +INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") +INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") +INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") +INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") +INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") +INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") +INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") +INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") +INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") +INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") +INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") +INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") +INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") +INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") +INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") +INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") +INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") +INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") +INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") +INST(IADD32I, "IADD32I", "0001 110- ---- ----") +INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") +INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") +INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") +INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") +INST(IDE, "IDE", "1110 0011 1001 ----") +INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") +INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") +INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") +INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") +INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") +INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") +INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") +INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") +INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") +INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") +INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") +INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") +INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") +INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") +INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") +INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") +INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") +INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") +INST(IPA, "IPA", "1110 0000 ---- ----") +INST(ISBERD, "ISBERD", "1110 1111 1101 0---") +INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") +INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") +INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") +INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") +INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") +INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") +INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") +INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") +INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") +INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") +INST(JCAL, "JCAL", "1110 0010 0010 ----") +INST(JMP, "JMP", "1110 0010 0001 ----") +INST(JMX, "JMX", "1110 0010 0000 ----") +INST(KIL, "KIL", "1110 0011 0011 ----") +INST(LD, "LD", "100- ---- ---- ----") +INST(LDC, "LDC", "1110 1111 1001 0---") +INST(LDG, "LDG", "1110 1110 1101 0---") +INST(LDL, "LDL", "1110 1111 0100 0---") +INST(LDS, "LDS", "1110 1111 0100 1---") +INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") +INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") +INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") +INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") +INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") +INST(LEPC, "LEPC", "0101 0000 1101 0---") +INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") +INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") +INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") +INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") +INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") +INST(LOP32I, "LOP32I", "0000 01-- ---- ----") +INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") +INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") +INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") +INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") +INST(MOV32I, "MOV32I", "0000 0001 0000 ----") +INST(MUFU, "MUFU", "0101 0000 1000 0---") +INST(NOP, "NOP", "0101 0000 1011 0---") +INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") +INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") +INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") +INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") +INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") +INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") +INST(PBK, "PBK", "1110 0010 1010 ----") +INST(PCNT, "PCNT", "1110 0010 1011 ----") +INST(PEXIT, "PEXIT", "1110 0010 0011 ----") +INST(PIXLD, "PIXLD", "1110 1111 1110 1---") +INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") +INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") +INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") +INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") +INST(PRET, "PRET", "1110 0010 0111 ----") +INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") +INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") +INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") +INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") +INST(PSET, "PSET", "0101 0000 1000 1---") +INST(PSETP, "PSETP", "0101 0000 1001 0---") +INST(R2B, "R2B", "1111 0000 1100 0---") +INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") +INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") +INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") +INST(RAM, "RAM", "1110 0011 1000 ----") +INST(RED, "RED", "1110 1011 1111 1---") +INST(RET, "RET", "1110 0011 0010 ----") +INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") +INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") +INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") +INST(RTT, "RTT", "1110 0011 0110 ----") +INST(S2R, "S2R", "1111 0000 1100 1---") +INST(SAM, "SAM", "1110 0011 0111 ----") +INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") +INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") +INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") +INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") +INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") +INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") +INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") +INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") +INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") +INST(SHFL, "SHFL", "1110 1111 0001 0---") +INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") +INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") +INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") +INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") +INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") +INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") +INST(SSY, "SSY", "1110 0010 1001 ----") +INST(ST, "ST", "101- ---- ---- ----") +INST(STG, "STG", "1110 1110 1101 1---") +INST(STL, "STL", "1110 1111 0101 0---") +INST(STP, "STP", "1110 1110 1010 0---") +INST(STS, "STS", "1110 1111 0101 1---") +INST(SUATOM, "SUATOM", "1110 1010 0--- ----") +INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") +INST(SULD, "SULD", "1110 1011 000- ----") +INST(SURED, "SURED", "1110 1011 010- ----") +INST(SUST, "SUST", "1110 1011 001- ----") +INST(SYNC, "SYNC", "1111 0000 1111 1---") +INST(TEX, "TEX", "1100 0--- ---- ----") +INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") +INST(TEXS, "TEXS", "1101 -00- ---- ----") +INST(TLD, "TLD", "1101 1100 ---- ----") +INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") +INST(TLD4, "TLD4", "1100 10-- ---- ----") +INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") +INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") +INST(TLDS, "TLDS", "1101 -01- ---- ----") +INST(TMML, "TMML", "1101 1111 0101 1---") +INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") +INST(TXA, "TXA", "1101 1111 0100 0---") +INST(TXD, "TXD", "1101 1110 00-- ----") +INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") +INST(TXQ, "TXQ", "1101 1111 0100 1---") +INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") +INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") +INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") +INST(VADD, "VADD", "0010 00-- ---- ----") +INST(VMAD, "VMAD", "0101 1111 ---- ----") +INST(VMNMX, "VMNMX", "0011 101- ---- ----") +INST(VOTE, "VOTE", "0101 0000 1101 1---") +INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") +INST(VSET, "VSET", "0100 000- ---- ----") +INST(VSETP, "VSETP", "0101 0000 1111 0---") +INST(VSHL, "VSHL", "0101 0111 ---- ----") +INST(VSHR, "VSHR", "0101 0110 ---- ----") +INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") +INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") +INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") +INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") + +// Removed due to its weird formatting making fast tables larger +// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 000000000..ccc40c20c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode)); + } + return NAME_TABLE[static_cast<size_t>(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <fmt/format.h> + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter<Shader::Maxwell::Opcode> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 000000000..8b3e0a15c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -0,0 +1,883 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> +#include <version> + +#include <fmt/format.h> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; + +using Tree = boost::intrusive::list<Statement, + // Allow using Statement without a definition + boost::intrusive::base_hook<ListBaseHook>, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size<false>>; +using Node = Tree::iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Kill, + Unreachable, + Function, + Identity, + Not, + Or, + SetVariable, + SetIndirectBranchVariable, + Variable, + IndirectBranchCond, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct Kill {}; +struct Unreachable {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct SetIndirectBranchVariable {}; +struct Variable {}; +struct IndirectBranchCond {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(const Flow::Block* block_, Statement* up_) + : block{block_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} + Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} + Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, IR::Condition cond_, Statement* up_) + : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) + : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) + : branch_offset{branch_offset_}, + branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} + Statement(Variable, u32 id_, Statement* up_) + : id{id_}, up{up_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_, Statement* up_) + : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + const Flow::Block* block; + Node label; + Tree children; + IR::Condition guest_cond; + Statement* op; + Statement* op_a; + u32 location; + s32 branch_offset; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + IR::Reg branch_reg; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + case StatementType::IndirectBranchCond: + return fmt::format("(indirect_branch == {:x})", stmt->location); + default: + return "<invalid type>"; + } +} + +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, + stmt->block->begin.Offset(), stmt->block->end.Offset(), + reinterpret_cast<uintptr_t>(stmt->block)); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::Kill: + ret += fmt::format("{} kill;\n", indent); + break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::SetIndirectBranchVariable: + ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, + stmt->branch_offset); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + case StatementType::IndirectBranchCond: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { + Node it{goto_stmt}; + do { + if (it == label_stmt) { + return true; + } + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + if (it == label_stmt) { + return true; + } + ++it; + } + return false; +} + +Node SiblingFromNephew(Node uncle, Node nephew) noexcept { + Statement* const parent{uncle->up}; + Statement* it{&*nephew}; + while (it->up != parent) { + it = it->up; + } + return Tree::s_iterator_to(*it); +} + +bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { + const Node end{right_sibling->up->children.end()}; + for (auto it = right_sibling; it != end; ++it) { + if (it == left_sibling) { + return false; + } + } + return true; +} + +bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { + const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; + return AreOrdered(sibling, goto_stmt); +} + +class GotoPass { +public: + explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { + std::vector gotos{BuildTree(cfg)}; + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); + } + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (NeedsLift(goto_stmt, label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // Expensive operation: + // if (!AreSiblings(goto_stmt, label_stmt)) { + // throw LogicError("Goto is not a sibling with the label"); + // } + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (AreOrdered(goto_stmt, label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector<Node> BuildTree(Flow::CFG& cfg) { + u32 label_id{0}; + std::vector<Node> gotos; + Flow::Function& first_function{cfg.Functions().front()}; + BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); + return gotos; + } + + void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, + std::vector<Node>& gotos, Node function_insert_point, + std::optional<Node> return_label) { + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; + Tree& root{root_stmt.children}; + std::unordered_map<Flow::Block*, Node> local_labels; + local_labels.reserve(function.blocks.size()); + + for (Flow::Block& block : function.blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + const Node label_it{root.insert(function_insert_point, *label)}; + local_labels.emplace(&block, label_it); + ++label_id; + } + for (Flow::Block& block : function.blocks) { + const Node label{local_labels.at(&block)}; + // Insertion point + const Node ip{std::next(label)}; + + // Reset goto variables before the first block and after its respective label + const auto make_reset_variable{[&]() -> Statement& { + return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); + }}; + root.push_front(make_reset_variable()); + root.insert(ip, make_reset_variable()); + root.insert(ip, *pool.Create(&block, &root_stmt)); + + switch (block.end_class) { + case Flow::EndClass::Branch: { + Statement* const always_cond{ + pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; + if (block.cond == IR::Condition{true}) { + const Node true_label{local_labels.at(block.branch_true)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); + } else if (block.cond == IR::Condition{false}) { + const Node false_label{local_labels.at(block.branch_false)}; + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } else { + const Node true_label{local_labels.at(block.branch_true)}; + const Node false_label{local_labels.at(block.branch_false)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } + break; + } + case Flow::EndClass::IndirectBranch: + root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, + block.branch_offset, &root_stmt)); + for (const Flow::IndirectBranch& indirect : block.indirect_branches) { + const Node indirect_label{local_labels.at(indirect.block)}; + Statement* cond{ + pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; + Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + } + root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); + break; + case Flow::EndClass::Call: { + Flow::Function& call{cfg.Functions()[block.function_call]}; + const Node call_return_label{local_labels.at(block.return_block)}; + BuildTree(cfg, call, label_id, gotos, ip, call_return_label); + break; + } + case Flow::EndClass::Exit: + root.insert(ip, *pool.Create(Return{}, &root_stmt)); + break; + case Flow::EndClass::Return: { + Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; + auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + break; + } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{}, &root_stmt)); + break; + } + } + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + switch (label_nested_stmt->type) { + case StatementType::If: + // Update nested if condition + label_nested_stmt->cond = + pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + body.insert(goto_stmt, *loop_stmt); + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + ObjectPool<Statement>& pool; + Statement root_stmt{FunctionTag{}}; +}; + +[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { + Tree& tree{stmt.up->children}; + const Node end{tree.end()}; + Node forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return &*forward_node; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + case StatementType::IndirectBranchCond: + return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, + ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, + IR::AbstractSyntaxList& syntax_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, + syntax_list{syntax_list_} { + Visit(root_stmt, nullptr, nullptr); + + IR::Block& first_block{*syntax_list.front().data.block}; + IR::IREmitter ir(first_block, first_block.begin()); + ir.Prologue(); + } + +private: + void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { + IR::Block* current_block{}; + const auto ensure_block{[&] { + if (current_block) { + return; + } + current_block = block_pool.Create(inst_pool); + auto& node{syntax_list.emplace_back()}; + node.type = IR::AbstractSyntaxNode::Type::Block; + node.data.block = current_block; + }}; + Tree& tree{parent.children}; + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + ensure_block(); + Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); + break; + } + case StatementType::SetVariable: { + ensure_block(); + IR::IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::SetIndirectBranchVariable: { + ensure_block(); + IR::IREmitter ir{*current_block}; + IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; + ir.SetIndirectBranchVariable(address); + break; + } + case StatementType::If: { + ensure_block(); + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Implement if header block + IR::IREmitter ir{*current_block}; + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + + const size_t if_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t then_block_index{syntax_list.size()}; + Visit(stmt, break_block, merge_block); + + IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; + current_block->AddBranch(then_block); + current_block->AddBranch(merge_block); + current_block = merge_block; + + auto& if_node{syntax_list[if_node_index]}; + if_node.type = IR::AbstractSyntaxNode::Type::If; + if_node.data.if_node.cond = cond; + if_node.data.if_node.body = then_block; + if_node.data.if_node.merge = merge_block; + + auto& endif_node{syntax_list.emplace_back()}; + endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; + endif_node.data.end_if.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = merge_block; + break; + } + case StatementType::Loop: { + IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + current_block->AddBranch(loop_header_block); + } + auto& header_node{syntax_list.emplace_back()}; + header_node.type = IR::AbstractSyntaxNode::Type::Block; + header_node.data.block = loop_header_block; + + IR::Block* const continue_block{block_pool.Create(inst_pool)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + const size_t loop_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t body_block_index{syntax_list.size()}; + Visit(stmt, merge_block, continue_block); + + // The continue block is located at the end of the loop + IR::IREmitter ir{*continue_block}; + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + + IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; + loop_header_block->AddBranch(body_block); + + continue_block->AddBranch(loop_header_block); + continue_block->AddBranch(merge_block); + + current_block = merge_block; + + auto& loop{syntax_list[loop_node_index]}; + loop.type = IR::AbstractSyntaxNode::Type::Loop; + loop.data.loop.body = body_block; + loop.data.loop.continue_block = continue_block; + loop.data.loop.merge = merge_block; + + auto& continue_block_node{syntax_list.emplace_back()}; + continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; + continue_block_node.data.block = continue_block; + + auto& repeat{syntax_list.emplace_back()}; + repeat.type = IR::AbstractSyntaxNode::Type::Repeat; + repeat.data.repeat.cond = cond; + repeat.data.repeat.loop_header = loop_header_block; + repeat.data.repeat.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = merge_block; + break; + } + case StatementType::Break: { + ensure_block(); + IR::Block* const skip_block{MergeBlock(parent, stmt)}; + + IR::IREmitter ir{*current_block}; + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + current_block->AddBranch(break_block); + current_block->AddBranch(skip_block); + current_block = skip_block; + + auto& break_node{syntax_list.emplace_back()}; + break_node.type = IR::AbstractSyntaxNode::Type::Break; + break_node.data.break_node.cond = cond; + break_node.data.break_node.merge = break_block; + break_node.data.break_node.skip = skip_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = skip_block; + break; + } + case StatementType::Return: { + ensure_block(); + IR::IREmitter{*current_block}.Epilogue(); + current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; + break; + } + case StatementType::Kill: { + ensure_block(); + IR::Block* demote_block{MergeBlock(parent, stmt)}; + IR::IREmitter{*current_block}.DemoteToHelperInvocation(); + current_block->AddBranch(demote_block); + current_block = demote_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.data.block = demote_block; + break; + } + case StatementType::Unreachable: { + ensure_block(); + current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block) { + if (fallthrough_block) { + current_block->AddBranch(fallthrough_block); + } else { + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; + } + } + } + + IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + Statement* merge_stmt{TryFindForwardBlock(stmt)}; + if (!merge_stmt) { + // Create a merge block we can visit later + merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + } + return block_pool.Create(inst_pool); + } + + ObjectPool<Statement>& stmt_pool; + ObjectPool<IR::Inst>& inst_pool; + ObjectPool<IR::Block>& block_pool; + Environment& env; + IR::AbstractSyntaxList& syntax_list; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else + const Flow::Block dummy_flow_block; +#endif +}; +} // Anonymous namespace + +IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::CFG& cfg) { + ObjectPool<Statement> stmt_pool{64}; + GotoPass goto_pass{cfg, stmt_pool}; + Statement& root{goto_pass.RootStatement()}; + IR::AbstractSyntaxList syntax_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + return syntax_list; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 000000000..88b083649 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -0,0 +1,20 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, + ObjectPool<IR::Block>& block_pool, Environment& env, + Flow::CFG& cfg); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..d9f999e05 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,214 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::DontCare, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::FTZ, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast<u64>(mem.rz_addr_offset.Value()); + } else { + return static_cast<u64>(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} + +IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, + AtomSize size, AtomOp op) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); + case AtomSize::U64: + case AtomSize::S64: + return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); + case AtomSize::F32: + return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); + case AtomSize::F16x2: { + return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); + } + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, + const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { + IR::Value result; + if (AtomOpNotApplicable(size, op)) { + result = LoadGlobal(v.ir, offset, size); + } else { + result = ApplyAtomOp(v, operand_reg, offset, size, op); + } + if (write_dest) { + StoreResult(v, dest_reg, result, size); + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + const IR::U64 offset{AtomOffset(*this, insn)}; + GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> operand_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + const IR::U64 offset{AtomOffset(*this, insn)}; + GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class BitSize : u64 { + B32, + B64, + B96, + B128, +}; + +void TranslatorVisitor::AL2P(u64 inst) { + union { + u64 raw; + BitField<0, 8, IR::Reg> result_register; + BitField<8, 8, IR::Reg> indexing_register; + BitField<20, 11, s64> offset; + BitField<47, 2, BitSize> bitsize; + } al2p{inst}; + if (al2p.bitsize != BitSize::B32) { + throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); + } + const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))}; + const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; + X(al2p.result_register, result); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..86e433e41 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -0,0 +1,96 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// Seems to be in CUDA terminology. +enum class LocalScope : u64 { + CTA, + GL, + SYS, + VC, +}; +} // Anonymous namespace + +void TranslatorVisitor::MEMBAR(u64 inst) { + union { + u64 raw; + BitField<8, 2, LocalScope> scope; + } const membar{inst}; + + if (membar.scope == LocalScope::CTA) { + ir.WorkgroupMemoryBarrier(); + } else { + ir.DeviceMemoryBarrier(); + } +} + +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op +} + +void TranslatorVisitor::BAR(u64 insn) { + enum class Mode { + RedPopc, + Scan, + RedAnd, + RedOr, + Sync, + Arrive, + }; + union { + u64 raw; + BitField<43, 1, u64> is_a_imm; + BitField<44, 1, u64> is_b_imm; + BitField<8, 8, u64> imm_a; + BitField<20, 12, u64> imm_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, IR::Pred> pred; + } const bar{insn}; + + const Mode mode{[insn] { + switch (insn & 0x0000009B00000000ULL) { + case 0x0000000200000000ULL: + return Mode::RedPopc; + case 0x0000000300000000ULL: + return Mode::Scan; + case 0x0000000A00000000ULL: + return Mode::RedAnd; + case 0x0000001200000000ULL: + return Mode::RedOr; + case 0x0000008000000000ULL: + return Mode::Sync; + case 0x0000008100000000ULL: + return Mode::Arrive; + } + throw NotImplementedException("Invalid encoding"); + }()}; + if (mode != Mode::Sync) { + throw NotImplementedException("BAR mode {}", mode); + } + if (bar.is_a_imm == 0) { + throw NotImplementedException("Non-immediate input A"); + } + if (bar.imm_a != 0) { + throw NotImplementedException("Non-zero input A"); + } + if (bar.is_b_imm == 0) { + throw NotImplementedException("Non-immediate input B"); + } + if (bar.imm_b != 0) { + throw NotImplementedException("Non-zero input B"); + } + if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { + throw NotImplementedException("Non-true input predicate"); + } + ir.Barrier(); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..9d5a87e52 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_reg; + BitField<40, 1, u64> brev; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const bfe{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + + // Common constants + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 one{v.ir.Imm32(1)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + // Edge case conditions + const IR::U1 zero_count{v.ir.IEqual(count, zero)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; + const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; + + IR::U32 base{v.X(bfe.offset_reg)}; + if (bfe.brev != 0) { + base = v.ir.BitReverse(base); + } + IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; + if (bfe.is_signed != 0) { + const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; + const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; + // Replicate condition + result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; + // Exceeding condition + const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; + result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; + } + // Zero count condition + result = IR::U32{v.ir.Select(zero_count, zero, result)}; + + v.X(bfe.dest_reg, result); + + if (bfe.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BFE_reg(u64 insn) { + BFE(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::BFE_cbuf(u64 insn) { + BFE(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::BFE_imm(u64 insn) { + BFE(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..1e1ec2119 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> insert_reg; + BitField<47, 1, u64> cc; + } const bfi{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; + const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + + // Edge case conditions + const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; + + const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; + const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; + + const IR::U32 insert{v.X(bfi.insert_reg)}; + IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; + + result = IR::U32{v.ir.Select(exceed_offset, base, result)}; + + v.X(bfi.dest_reg, result); + if (bfi.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BFI_reg(u64 insn) { + BFI(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_rc(u64 insn) { + BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::BFI_cr(u64 insn) { + BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_imm(u64 insn) { + BFI(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..fd73f656c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" + +namespace Shader::Maxwell { + +enum class FpRounding : u64 { + RN, + RM, + RP, + RZ, +}; + +enum class FmzMode : u64 { + None, + FTZ, + FMZ, + INVALIDFMZ3, +}; + +inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { + switch (fp_rounding) { + case FpRounding::RN: + return IR::FpRounding::RN; + case FpRounding::RM: + return IR::FpRounding::RM; + case FpRounding::RP: + return IR::FpRounding::RP; + case FpRounding::RZ: + return IR::FpRounding::RZ; + } + throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); +} + +inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { + switch (fmz_mode) { + case FmzMode::None: + return IR::FmzMode::None; + case FmzMode::FTZ: + return IR::FmzMode::FTZ; + case FmzMode::FMZ: + // FMZ is manually handled in the instruction + return IR::FmzMode::FTZ; + case FmzMode::INVALIDFMZ3: + break; + } + throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..20458d2ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -0,0 +1,153 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" + +namespace Shader::Maxwell { +IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return ir.ILessThan(operand_1, operand_2, is_signed); + case CompareOp::Equal: + return ir.IEqual(operand_1, operand_2); + case CompareOp::LessThanEqual: + return ir.ILessThanEqual(operand_1, operand_2, is_signed); + case CompareOp::GreaterThan: + return ir.IGreaterThan(operand_1, operand_2, is_signed); + case CompareOp::NotEqual: + return ir.INotEqual(operand_1, operand_2); + case CompareOp::GreaterThanEqual: + return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, + BooleanOp bop) { + switch (bop) { + case BooleanOp::AND: + return ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::OR: + return ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::XOR: + return ir.LogicalXor(predicate_1, predicate_2); + default: + throw NotImplementedException("Invalid bop {}", bop); + } +} + +IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { + switch (op) { + case PredicateOp::False: + return ir.Imm1(false); + case PredicateOp::True: + return ir.Imm1(true); + case PredicateOp::Zero: + return ir.IEqual(result, ir.Imm32(0)); + case PredicateOp::NonZero: + return ir.INotEqual(result, ir.Imm32(0)); + default: + throw NotImplementedException("Invalid Predicate operation {}", op); + } +} + +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid FP compare op {}", compare_op); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..214d0af3c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed); + +[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, + bool is_signed); + +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop); + +[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); + +[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); + +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control = {}); +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..420f2fb94 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::CSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + } const cset{insn}; + + const IR::U32 one_mask{ir.Imm32(-1)}; + const IR::U32 fp_one{ir.Imm32(0x3f800000)}; + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; + const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; + const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; + const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; + X(cset.dest_reg, result); + if (cset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (cset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } +} + +void TranslatorVisitor::CSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, BooleanOp> bop; + } const csetp{insn}; + + const BooleanOp bop{csetp.bop}; + const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; + const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; + ir.SetPred(csetp.dest_pred_a, result_a); + ir.SetPred(csetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..5a1b3a8fc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + } const dadd{insn}; + if (dadd.cc != 0) { + throw NotImplementedException("DADD CC"); + } + + const IR::F64 src_a{v.D(dadd.src_a_reg)}; + const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; + + const IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, + }; + + v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DADD_reg(u64 insn) { + DADD(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DADD_cbuf(u64 insn) { + DADD(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DADD_imm(u64 insn) { + DADD(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..1173192e4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -0,0 +1,72 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + } const dset{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; + + IR::U1 pred{v.ir.GetPred(dset.pred)}; + if (dset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; + + v.X(dset.dest_reg, result); + if (dset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (dset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::DSET_reg(u64 insn) { + DSET(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSET_cbuf(u64 insn) { + DSET(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSET_imm(u64 insn) { + DSET(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..f66097014 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<50, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + } const dfma{insn}; + + if (dfma.cc != 0) { + throw NotImplementedException("DFMA CC"); + } + + const IR::F64 src_a{v.D(dfma.src_a_reg)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; + const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; + + const IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, + }; + + v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DFMA_reg(u64 insn) { + DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_cr(u64 insn) { + DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_rc(u64 insn) { + DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DFMA_imm(u64 insn) { + DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..6b551847c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const dmnmx{insn}; + + if (dmnmx.cc != 0) { + throw NotImplementedException("DMNMX CC"); + } + + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; + + IR::F64 max{v.ir.FPMax(op_a, op_b)}; + IR::F64 min{v.ir.FPMin(op_a, op_b)}; + + if (dmnmx.neg_pred != 0) { + std::swap(min, max); + } + v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DMNMX_reg(u64 insn) { + DMNMX(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMNMX_cbuf(u64 insn) { + DMNMX(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMNMX_imm(u64 insn) { + DMNMX(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..c0159fb65 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg; + } const dmul{insn}; + + if (dmul.cc != 0) { + throw NotImplementedException("DMUL CC"); + } + + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; + const IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, + }; + + v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DMUL_reg(u64 insn) { + DMUL(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMUL_cbuf(u64 insn) { + DMUL(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMUL_imm(u64 insn) { + DMUL(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + } const dsetp{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; + + const BooleanOp bop{dsetp.bop}; + const FPCompareOp compare_op{dsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; + const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(dsetp.dest_pred_a, result_a); + v.ir.SetPred(dsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::DSETP_reg(u64 insn) { + DSETP(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSETP_cbuf(u64 insn) { + DSETP(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSETP_imm(u64 insn) { + DSETP(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..c2443c886 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + v.ir.SetSampleMask(v.X(src_reg)); + } + if (sph.ps.omap.depth != 0) { + v.ir.SetFragDepth(v.F(src_reg + 1)); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..f0cb25d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -0,0 +1,47 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + BitField<41, 1, u64> shift; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const flo{insn}; + + if (flo.cc != 0) { + throw NotImplementedException("CC"); + } + if (flo.tilde != 0) { + src = v.ir.BitwiseNot(src); + } + IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; + if (flo.shift != 0) { + const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; + result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; + } + v.X(flo.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FLO_reg(u64 insn) { + FLO(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FLO_cbuf(u64 insn) { + FLO(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::FLO_imm(u64 insn) { + FLO(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..b8c89810c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, + const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fadd{insn}; + + if (cc) { + throw NotImplementedException("FADD CC"); + } + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + IR::FpControl control{ + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); +} + +void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd{insn}; + + FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, + fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FADD_reg(u64 insn) { + FADD(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); +} + +void TranslatorVisitor::FADD32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<56, 1, u64> neg_a; + BitField<54, 1, u64> abs_a; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> neg_b; + BitField<57, 1, u64> abs_b; + } const fadd32i{insn}; + + FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), + fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..7127ebf54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fcmp{insn}; + + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; + const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; + const IR::U32 src_reg{v.X(fcmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(fcmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FCMP_reg(u64 insn) { + FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_rc(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FCMP_cr(u64 insn) { + FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_imm(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const fcmp{insn}; + const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; + const u32 value{static_cast<u32>(fcmp.value) << 12}; + + FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..eece4f28f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -0,0 +1,78 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<55, 1, u64> ftz; + } const fset{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + IR::U1 pred{v.ir.GetPred(fset.pred)}; + if (fset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; + + v.X(fset.dest_reg, result); + if (fset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (fset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::FSET_reg(u64 insn) { + FSET(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSET_cbuf(u64 insn) { + FSET(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSET_imm(u64 insn) { + FSET(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..02ab023c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -0,0 +1,214 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class RoundingOp : u64 { + None = 0, + Pass = 3, + Round = 8, + Floor = 9, + Ceil = 10, + Trunc = 11, +}; + +[[nodiscard]] u32 WidthSize(FloatFormat width) { + switch (width) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; + BitField<50, 1, u64> sat; + BitField<39, 4, u64> rounding_op; + BitField<39, 2, FpRounding> rounding; + BitField<10, 2, FloatFormat> src_size; + BitField<8, 2, FloatFormat> dst_size; + + [[nodiscard]] RoundingOp RoundingOperation() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); + } + } const f2f{insn}; + + if (f2f.cc != 0) { + throw NotImplementedException("F2F CC"); + } + + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; + + const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; + IR::FpControl fp_control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + if (f2f.src_size != f2f.dst_size) { + fp_control.rounding = CastFpRounding(f2f.rounding); + input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); + } else { + switch (f2f.RoundingOperation()) { + case RoundingOp::None: + case RoundingOp::Pass: + // Make sure NANs are handled properly + switch (f2f.src_size) { + case FloatFormat::F16: + input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); + break; + case FloatFormat::F32: + input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); + break; + case FloatFormat::F64: + input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); + break; + } + break; + case RoundingOp::Round: + input = v.ir.FPRoundEven(input, fp_control); + break; + case RoundingOp::Floor: + input = v.ir.FPFloor(input, fp_control); + break; + case RoundingOp::Ceil: + input = v.ir.FPCeil(input, fp_control); + break; + case RoundingOp::Trunc: + input = v.ir.FPTrunc(input, fp_control); + break; + default: + throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); + } + } + if (f2f.sat != 0 && !any_fp64) { + input = v.ir.FPSaturate(input); + } + + switch (f2f.dst_size) { + case FloatFormat::F16: { + const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); + break; + } + case FloatFormat::F32: + v.F(f2f.dest_reg, input); + break; + case FloatFormat::F64: + v.D(f2f.dest_reg, input); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2F_reg(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatReg20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleReg20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_cbuf(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatCbuf(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleCbuf(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + BitField<20, 19, u64> imm; + BitField<56, 1, u64> imm_neg; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; + const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; + if (f2f.imm_neg != 0) { + throw NotImplementedException("Neg bit on F16"); + } + break; + } + case FloatFormat::F32: + src_a = GetFloatImm20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleImm20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..92b1ce015 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -0,0 +1,253 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <limits> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class DestFormat : u64 { + Invalid, + I16, + I32, + I64, +}; +enum class SrcFormat : u64 { + Invalid, + F16, + F32, + F64, +}; +enum class Rounding : u64 { + Round, + Floor, + Ceil, + Trunc, +}; + +union F2I { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, DestFormat> dest_format; + BitField<10, 2, SrcFormat> src_format; + BitField<12, 1, u64> is_signed; + BitField<39, 2, Rounding> rounding; + BitField<41, 1, u64> half; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> abs; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> neg; +}; + +size_t BitSize(DestFormat dest_format) { + switch (dest_format) { + case DestFormat::I16: + return 16; + case DestFormat::I32: + return 32; + case DestFormat::I64: + return 64; + default: + throw NotImplementedException("Invalid destination format {}", dest_format); + } +} + +std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { + if (is_signed) { + switch (format) { + case DestFormat::I16: + return {static_cast<f64>(std::numeric_limits<s16>::max()), + static_cast<f64>(std::numeric_limits<s16>::min())}; + case DestFormat::I32: + return {static_cast<f64>(std::numeric_limits<s32>::max()), + static_cast<f64>(std::numeric_limits<s32>::min())}; + case DestFormat::I64: + return {static_cast<f64>(std::numeric_limits<s64>::max()), + static_cast<f64>(std::numeric_limits<s64>::min())}; + default: + break; + } + } else { + switch (format) { + case DestFormat::I16: + return {static_cast<f64>(std::numeric_limits<u16>::max()), + static_cast<f64>(std::numeric_limits<u16>::min())}; + case DestFormat::I32: + return {static_cast<f64>(std::numeric_limits<u32>::max()), + static_cast<f64>(std::numeric_limits<u32>::min())}; + case DestFormat::I64: + return {static_cast<f64>(std::numeric_limits<u64>::max()), + static_cast<f64>(std::numeric_limits<u64>::min())}; + default: + break; + } + } + throw NotImplementedException("Invalid destination format {}", format); +} + +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { + // F2I is used to convert from a floating point value to an integer + const F2I f2i{insn}; + + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, + }; + const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::F16F32F64 rounded_value{[&] { + switch (f2i.rounding) { + case Rounding::Round: + return v.ir.FPRoundEven(op_a, fp_control); + case Rounding::Floor: + return v.ir.FPFloor(op_a, fp_control); + case Rounding::Ceil: + return v.ir.FPCeil(op_a, fp_control); + case Rounding::Trunc: + return v.ir.FPTrunc(op_a, fp_control); + default: + throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); + } + }()}; + const bool is_signed{f2i.is_signed != 0}; + const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); + + IR::F16F32F64 intermediate; + switch (f2i.src_format) { + case SrcFormat::F16: { + const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; + const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F32: { + const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; + const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F64: { + const IR::F64 max_val{v.ir.Imm64(max_bound)}; + const IR::F64 min_val{v.ir.Imm64(min_bound)}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + default: + throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); + } + + const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; + IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; + + bool handled_special_case = false; + const bool special_nan_cases = + (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); + if (special_nan_cases) { + if (f2i.dest_format == DestFormat::I32) { + handled_special_case = true; + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; + } else if (f2i.dest_format == DestFormat::I64) { + handled_special_case = true; + result = IR::U64{ + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; + } + } + if (!handled_special_case && is_signed) { + if (bitsize != 64) { + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; + } else { + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; + } + } + + if (bitsize == 64) { + v.L(f2i.dest_reg, result); + } else { + v.X(f2i.dest_reg, result); + } + + if (f2i.cc != 0) { + throw NotImplementedException("F2I CC"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2I_reg(u64 insn) { + union { + u64 raw; + F2I base; + BitField<20, 8, IR::Reg> src_reg; + } const f2i{insn}; + + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.base.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; + case SrcFormat::F32: + return F(f2i.src_reg); + case SrcFormat::F64: + return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); + default: + throw NotImplementedException("Invalid F2I source format {}", + f2i.base.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetFloatCbuf(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_imm(u64) { + throw NotImplementedException("{}", Opcode::F2I_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fa2a7807b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -0,0 +1,94 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, + bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const ffma{insn}; + + if (cc) { + throw NotImplementedException("FFMA CC"); + } + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), + }; + IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, op_c, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(ffma.dest_reg, value); +} + +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> sat; + BitField<51, 2, FpRounding> fp_rounding; + BitField<53, 2, FmzMode> fmz_mode; + } const ffma{insn}; + + FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, + ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); +} +} // Anonymous namespace + +void TranslatorVisitor::FFMA_reg(u64 insn) { + FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FFMA_rc(u64 insn) { + FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FFMA_cr(u64 insn) { + FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FFMA_imm(u64 insn) { + FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FFMA32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz_mode; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> neg_c; + } const ffma32i{insn}; + + FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, + ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c0d6ee5af --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const fmnmx{insn}; + + if (fmnmx.cc) { + throw NotImplementedException("FMNMX CC"); + } + + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; + + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; + IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; + + if (fmnmx.neg_pred != 0) { + std::swap(min, max); + } + + v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FMNMX_reg(u64 insn) { + FMNMX(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMNMX_cbuf(u64 insn) { + FMNMX(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMNMX_imm(u64 insn) { + FMNMX(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..2f8605619 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Operation : u64 { + Cos = 0, + Sin = 1, + Ex2 = 2, // Base 2 exponent + Lg2 = 3, // Base 2 logarithm + Rcp = 4, // Reciprocal + Rsq = 5, // Reciprocal square root + Rcp64H = 6, // 64-bit reciprocal + Rsq64H = 7, // 64-bit reciprocal square root + Sqrt = 8, +}; +} // Anonymous namespace + +void TranslatorVisitor::MUFU(u64 insn) { + // MUFU is used to implement a bunch of special functions. See Operation. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 4, Operation> operation; + BitField<46, 1, u64> abs; + BitField<48, 1, u64> neg; + BitField<50, 1, u64> sat; + } const mufu{insn}; + + const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::F32 value{[&]() -> IR::F32 { + switch (mufu.operation) { + case Operation::Cos: + return ir.FPCos(op_a); + case Operation::Sin: + return ir.FPSin(op_a); + case Operation::Ex2: + return ir.FPExp2(op_a); + case Operation::Lg2: + return ir.FPLog2(op_a); + case Operation::Rcp: + return ir.FPRecip(op_a); + case Operation::Rsq: + return ir.FPRecipSqrt(op_a); + case Operation::Rcp64H: + throw NotImplementedException("MUFU.RCP64H"); + case Operation::Rsq64H: + throw NotImplementedException("MUFU.RSQ64H"); + case Operation::Sqrt: + return ir.FPSqrt(op_a); + default: + throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); + } + }()}; + + if (mufu.sat) { + value = ir.FPSaturate(value); + } + + F(mufu.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..06226b7ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -0,0 +1,127 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Scale : u64 { + None, + D2, + D4, + D8, + M8, + M4, + M2, + INVALIDSCALE37, +}; + +float ScaleFactor(Scale scale) { + switch (scale) { + case Scale::None: + return 1.0f; + case Scale::D2: + return 1.0f / 2.0f; + case Scale::D4: + return 1.0f / 4.0f; + case Scale::D8: + return 1.0f / 8.0f; + case Scale::M8: + return 8.0f; + case Scale::M4: + return 4.0f; + case Scale::M2: + return 2.0f; + case Scale::INVALIDSCALE37: + break; + } + throw NotImplementedException("Invalid FMUL scale {}", scale); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, + FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fmul{insn}; + + if (cc) { + throw NotImplementedException("FMUL CC"); + } + IR::F32 op_a{v.F(fmul.src_a)}; + if (scale != Scale::None) { + if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { + throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); + } + op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); + } + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), + }; + IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, zero, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fmul.dest_reg, value); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 3, Scale> scale; + BitField<44, 2, FmzMode> fmz; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<50, 1, u64> sat; + } const fmul{insn}; + + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, + fmul.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FMUL_reg(u64 insn) { + return FMUL(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMUL_cbuf(u64 insn) { + return FMUL(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMUL_imm(u64 insn) { + return FMUL(*this, insn, GetFloatImm20(insn)); +} + +void TranslatorVisitor::FMUL32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz; + BitField<55, 1, u64> sat; + } const fmul32i{insn}; + + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, + fmul32i.sat != 0, fmul32i.cc != 0, false); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + SINCOS, + EX2, +}; + +void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 1, Mode> mode; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + } const rro{insn}; + + v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); +} +} // Anonymous namespace + +void TranslatorVisitor::RRO_reg(u64 insn) { + RRO(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::RRO_cbuf(u64 insn) { + RRO(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::RRO_imm(u64) { + throw NotImplementedException("RRO (imm)"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..5f93a1513 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fsetp{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + const BooleanOp bop{fsetp.bop}; + const FPCompareOp compare_op{fsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; + const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(fsetp.dest_pred_a, result_a); + v.ir.SetPred(fsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::FSETP_reg(u64 insn) { + FSETP(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSETP_cbuf(u64 insn) { + FSETP(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSETP_imm(u64 insn) { + FSETP(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..7550a8d4c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::FSWZADD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<28, 8, u64> swizzle; + BitField<38, 1, u64> ndv; + BitField<39, 2, FpRounding> round; + BitField<44, 1, u64> ftz; + BitField<47, 1, u64> cc; + } const fswzadd{insn}; + + if (fswzadd.ndv != 0) { + throw NotImplementedException("FSWZADD NDV"); + } + + const IR::F32 src_a{GetFloatReg8(insn)}; + const IR::F32 src_b{GetFloatReg20(insn)}; + const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; + + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; + F(fswzadd.dest_reg, result); + + if (fswzadd.cc != 0) { + throw NotImplementedException("FSWZADD CC"); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..f2738a93b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -0,0 +1,125 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hadd2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); +} + +void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, + const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hadd2{insn}; + + HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, + hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, Swizzle> swizzle_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + GetReg20(insn)); +} + +void TranslatorVisitor::HADD2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> abs_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + GetCbuf(insn)); +} + +void TranslatorVisitor::HADD2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hadd2{insn}; + + const u32 imm{ + static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HADD2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hadd2{insn}; + + const u32 imm{static_cast<u32>(hadd2.imm32)}; + HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, + hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fd7986701 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -0,0 +1,169 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, + Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, + bool sat, HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hfma2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; + const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + if (lhs_c.Type() == IR::Type::F16) { + lhs_c = v.ir.FPConvert(32, lhs_c); + rhs_c = v.ir.FPConvert(32, rhs_c); + } + } + + lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); + + lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); + rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); + + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), + }; + IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); +} + +void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, + Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, + HalfPrecision precision) { + union { + u64 raw; + BitField<47, 2, Swizzle> swizzle_a; + BitField<49, 2, Merge> merge; + } const hfma2{insn}; + + HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, + sat, precision); +} +} // Anonymous namespace + +void TranslatorVisitor::HFMA2_reg(u64 insn) { + union { + u64 raw; + BitField<28, 2, Swizzle> swizzle_b; + BitField<32, 1, u64> saturate; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> neg_c; + BitField<35, 2, Swizzle> swizzle_c; + BitField<37, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, + GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_rc(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_b; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, + GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_cr(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, + GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_imm(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{ + static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + + HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), + GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; + BitField<20, 32, u64> imm32; + BitField<52, 1, u64> neg_c; + BitField<53, 2, Swizzle> swizzle_a; + BitField<55, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast<u32>(hfma2.imm32)}; + HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, + Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..0dbeb7f56 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { + switch (precision) { + case HalfPrecision::None: + return IR::FmzMode::None; + case HalfPrecision::FTZ: + return IR::FmzMode::FTZ; + case HalfPrecision::FMZ: + return IR::FmzMode::FMZ; + default: + return IR::FmzMode::DontCare; + } +} + +std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool is_h0{merge == Merge::MRG_H0}; + const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..59da56a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -0,0 +1,42 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +enum class HalfPrecision : u64 { + None = 0, + FTZ = 1, + FMZ = 2, +}; + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); + +std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..3f548ce76 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -0,0 +1,143 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, + HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hmul2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), + }; + IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); +} + +void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, + Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<47, 2, Swizzle> swizzle_a; + BitField<39, 2, HalfPrecision> precision; + } const hmul2{insn}; + + HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, + hmul2.precision); +} +} // Anonymous namespace + +void TranslatorVisitor::HMUL2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<44, 1, u64> abs_a; + BitField<28, 2, Swizzle> swizzle_b; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, + hmul2.swizzle_b, GetReg20(insn)); +} + +void TranslatorVisitor::HMUL2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<54, 1, u64> abs_b; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, + Swizzle::F32, GetCbuf(insn)); +} + +void TranslatorVisitor::HMUL2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + const u32 imm{ + static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, + Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HMUL2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 2, HalfPrecision> precision; + BitField<52, 1, u64> sat; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hmul2{insn}; + + const u32 imm{static_cast<u32>(hmul2.imm32)}; + HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, + Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..cca5b831f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -0,0 +1,117 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, + bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hset2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + + lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + IR::U1 pred{v.ir.GetPred(hset2.pred)}; + if (hset2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; + + const u32 true_value = bf ? 0x3c00 : 0xffff; + const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; + const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; + const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; + + v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); +} +} // Anonymous namespace + +void TranslatorVisitor::HSET2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> bf; + BitField<31, 1, u64> neg_b; + BitField<50, 1, u64> ftz; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hset2{insn}; + + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, + hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); +} + +void TranslatorVisitor::HSET2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + } const hset2{insn}; + + HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, + hset2.compare_op, Swizzle::F32); +} + +void TranslatorVisitor::HSET2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{ + static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, + Swizzle::H1_H0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..b3931dae3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -0,0 +1,118 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, + Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { + union { + u64 insn; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<6, 1, u64> ftz; + BitField<47, 2, Swizzle> swizzle_a; + } const hsetp2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + + lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), + }; + + IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; + if (hsetp2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; + + if (h_and) { + auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); + v.ir.SetPred(hsetp2.dest_pred_a, result); + v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); + } else { + v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); + v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); + } +} +} // Anonymous namespace + +void TranslatorVisitor::HSETP2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> h_and; + BitField<31, 1, u64> neg_b; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hsetp2{insn}; + HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> abs_b; + BitField<56, 1, u64> neg_b; + BitField<49, 4, FPCompareOp> compare_op; + } const hsetp2{insn}; + + HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hsetp2{insn}; + + const u32 imm{static_cast<u32>(hsetp2.low << 6) | + static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hsetp2.high << 22) | + static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + + HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, + hsetp2.h_and != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..b446aae0e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -0,0 +1,272 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, + u32 offset) { + if (unaligned) { + return ir.Imm32(0); + } + return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); +} +} // Anonymous namespace + +IR::U32 TranslatorVisitor::X(IR::Reg reg) { + return ir.GetReg(reg); +} + +IR::U64 TranslatorVisitor::L(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + +IR::F32 TranslatorVisitor::F(IR::Reg reg) { + return ir.BitCast<IR::F32>(X(reg)); +} + +IR::F64 TranslatorVisitor::D(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + +void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { + ir.SetReg(dest_reg, value); +} + +void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackUint2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); + } +} + +void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { + X(dest_reg, ir.BitCast<IR::U32>(value)); +} + +void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); + } +} + +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { + return ir.BitCast<IR::F32>(GetReg8(insn)); +} + +IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { + return ir.BitCast<IR::F32>(GetReg20(insn)); +} + +IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { + return ir.BitCast<IR::F32>(GetReg39(insn)); +} + +IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + +IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + +static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { + union { + u64 raw; + BitField<20, 14, u64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x10'000) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); + } + const IR::Value binding{static_cast<u32>(cbuf.binding)}; + const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; + return {IR::U32{binding}, IR::U32{byte_offset}}; +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + const auto [binding, byte_offset]{CbufAddr(insn)}; + return ir.GetCbuf(binding, byte_offset); +} + +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { + const auto [binding, byte_offset]{CbufAddr(insn)}; + return ir.GetFloatCbuf(binding, byte_offset); +} + +IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + const auto [binding, offset_value]{CbufAddr(insn)}; + const bool unaligned{cbuf.unaligned != 0}; + const u32 offset{offset_value.U32()}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; + + const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; + const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; + return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); +} + +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + +IR::U32 TranslatorVisitor::GetImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + + if (imm.is_negative != 0) { + const s64 raw{static_cast<s64>(imm.value)}; + return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast<u32>(imm.value)); + } +} + +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; + const u32 value{static_cast<u32>(imm.value) << 12}; + return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); +} + +IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; + const u64 value{imm.value << 44}; + return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); +} + +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32)); +} + +IR::U32 TranslatorVisitor::GetImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(static_cast<u32>(imm.value)); +} + +IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value))); +} + +void TranslatorVisitor::SetZFlag(const IR::U1& value) { + ir.SetZFlag(value); +} + +void TranslatorVisitor::SetSFlag(const IR::U1& value) { + ir.SetSFlag(value); +} + +void TranslatorVisitor::SetCFlag(const IR::U1& value) { + ir.SetCFlag(value); +} + +void TranslatorVisitor::SetOFlag(const IR::U1& value) { + ir.SetOFlag(value); +} + +void TranslatorVisitor::ResetZero() { + SetZFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetSFlag() { + SetSFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetCFlag() { + SetCFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetOFlag() { + SetOFlag(ir.Imm1(false)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..335e4f24f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -0,0 +1,387 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" + +namespace Shader::Maxwell { + +enum class CompareOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +enum class BooleanOp : u64 { + AND, + OR, + XOR, +}; + +enum class PredicateOp : u64 { + False, + True, + Zero, + NonZero, +}; + +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + +class TranslatorVisitor { +public: + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} + + Environment& env; + IR::IREmitter ir; + + void AL2P(u64 insn); + void ALD(u64 insn); + void AST(u64 insn); + void ATOM_cas(u64 insn); + void ATOM(u64 insn); + void ATOMS_cas(u64 insn); + void ATOMS(u64 insn); + void B2R(u64 insn); + void BAR(u64 insn); + void BFE_reg(u64 insn); + void BFE_cbuf(u64 insn); + void BFE_imm(u64 insn); + void BFI_reg(u64 insn); + void BFI_rc(u64 insn); + void BFI_cr(u64 insn); + void BFI_imm(u64 insn); + void BPT(u64 insn); + void BRA(u64 insn); + void BRK(u64 insn); + void BRX(u64 insn); + void CAL(); + void CCTL(u64 insn); + void CCTLL(u64 insn); + void CONT(u64 insn); + void CS2R(u64 insn); + void CSET(u64 insn); + void CSETP(u64 insn); + void DADD_reg(u64 insn); + void DADD_cbuf(u64 insn); + void DADD_imm(u64 insn); + void DEPBAR(); + void DFMA_reg(u64 insn); + void DFMA_rc(u64 insn); + void DFMA_cr(u64 insn); + void DFMA_imm(u64 insn); + void DMNMX_reg(u64 insn); + void DMNMX_cbuf(u64 insn); + void DMNMX_imm(u64 insn); + void DMUL_reg(u64 insn); + void DMUL_cbuf(u64 insn); + void DMUL_imm(u64 insn); + void DSET_reg(u64 insn); + void DSET_cbuf(u64 insn); + void DSET_imm(u64 insn); + void DSETP_reg(u64 insn); + void DSETP_cbuf(u64 insn); + void DSETP_imm(u64 insn); + void EXIT(); + void F2F_reg(u64 insn); + void F2F_cbuf(u64 insn); + void F2F_imm(u64 insn); + void F2I_reg(u64 insn); + void F2I_cbuf(u64 insn); + void F2I_imm(u64 insn); + void FADD_reg(u64 insn); + void FADD_cbuf(u64 insn); + void FADD_imm(u64 insn); + void FADD32I(u64 insn); + void FCHK_reg(u64 insn); + void FCHK_cbuf(u64 insn); + void FCHK_imm(u64 insn); + void FCMP_reg(u64 insn); + void FCMP_rc(u64 insn); + void FCMP_cr(u64 insn); + void FCMP_imm(u64 insn); + void FFMA_reg(u64 insn); + void FFMA_rc(u64 insn); + void FFMA_cr(u64 insn); + void FFMA_imm(u64 insn); + void FFMA32I(u64 insn); + void FLO_reg(u64 insn); + void FLO_cbuf(u64 insn); + void FLO_imm(u64 insn); + void FMNMX_reg(u64 insn); + void FMNMX_cbuf(u64 insn); + void FMNMX_imm(u64 insn); + void FMUL_reg(u64 insn); + void FMUL_cbuf(u64 insn); + void FMUL_imm(u64 insn); + void FMUL32I(u64 insn); + void FSET_reg(u64 insn); + void FSET_cbuf(u64 insn); + void FSET_imm(u64 insn); + void FSETP_reg(u64 insn); + void FSETP_cbuf(u64 insn); + void FSETP_imm(u64 insn); + void FSWZADD(u64 insn); + void GETCRSPTR(u64 insn); + void GETLMEMBASE(u64 insn); + void HADD2_reg(u64 insn); + void HADD2_cbuf(u64 insn); + void HADD2_imm(u64 insn); + void HADD2_32I(u64 insn); + void HFMA2_reg(u64 insn); + void HFMA2_rc(u64 insn); + void HFMA2_cr(u64 insn); + void HFMA2_imm(u64 insn); + void HFMA2_32I(u64 insn); + void HMUL2_reg(u64 insn); + void HMUL2_cbuf(u64 insn); + void HMUL2_imm(u64 insn); + void HMUL2_32I(u64 insn); + void HSET2_reg(u64 insn); + void HSET2_cbuf(u64 insn); + void HSET2_imm(u64 insn); + void HSETP2_reg(u64 insn); + void HSETP2_cbuf(u64 insn); + void HSETP2_imm(u64 insn); + void I2F_reg(u64 insn); + void I2F_cbuf(u64 insn); + void I2F_imm(u64 insn); + void I2I_reg(u64 insn); + void I2I_cbuf(u64 insn); + void I2I_imm(u64 insn); + void IADD_reg(u64 insn); + void IADD_cbuf(u64 insn); + void IADD_imm(u64 insn); + void IADD3_reg(u64 insn); + void IADD3_cbuf(u64 insn); + void IADD3_imm(u64 insn); + void IADD32I(u64 insn); + void ICMP_reg(u64 insn); + void ICMP_rc(u64 insn); + void ICMP_cr(u64 insn); + void ICMP_imm(u64 insn); + void IDE(u64 insn); + void IDP_reg(u64 insn); + void IDP_imm(u64 insn); + void IMAD_reg(u64 insn); + void IMAD_rc(u64 insn); + void IMAD_cr(u64 insn); + void IMAD_imm(u64 insn); + void IMAD32I(u64 insn); + void IMADSP_reg(u64 insn); + void IMADSP_rc(u64 insn); + void IMADSP_cr(u64 insn); + void IMADSP_imm(u64 insn); + void IMNMX_reg(u64 insn); + void IMNMX_cbuf(u64 insn); + void IMNMX_imm(u64 insn); + void IMUL_reg(u64 insn); + void IMUL_cbuf(u64 insn); + void IMUL_imm(u64 insn); + void IMUL32I(u64 insn); + void IPA(u64 insn); + void ISBERD(u64 insn); + void ISCADD_reg(u64 insn); + void ISCADD_cbuf(u64 insn); + void ISCADD_imm(u64 insn); + void ISCADD32I(u64 insn); + void ISET_reg(u64 insn); + void ISET_cbuf(u64 insn); + void ISET_imm(u64 insn); + void ISETP_reg(u64 insn); + void ISETP_cbuf(u64 insn); + void ISETP_imm(u64 insn); + void JCAL(u64 insn); + void JMP(u64 insn); + void JMX(u64 insn); + void KIL(); + void LD(u64 insn); + void LDC(u64 insn); + void LDG(u64 insn); + void LDL(u64 insn); + void LDS(u64 insn); + void LEA_hi_reg(u64 insn); + void LEA_hi_cbuf(u64 insn); + void LEA_lo_reg(u64 insn); + void LEA_lo_cbuf(u64 insn); + void LEA_lo_imm(u64 insn); + void LEPC(u64 insn); + void LONGJMP(u64 insn); + void LOP_reg(u64 insn); + void LOP_cbuf(u64 insn); + void LOP_imm(u64 insn); + void LOP3_reg(u64 insn); + void LOP3_cbuf(u64 insn); + void LOP3_imm(u64 insn); + void LOP32I(u64 insn); + void MEMBAR(u64 insn); + void MOV_reg(u64 insn); + void MOV_cbuf(u64 insn); + void MOV_imm(u64 insn); + void MOV32I(u64 insn); + void MUFU(u64 insn); + void NOP(u64 insn); + void OUT_reg(u64 insn); + void OUT_cbuf(u64 insn); + void OUT_imm(u64 insn); + void P2R_reg(u64 insn); + void P2R_cbuf(u64 insn); + void P2R_imm(u64 insn); + void PBK(); + void PCNT(); + void PEXIT(u64 insn); + void PIXLD(u64 insn); + void PLONGJMP(u64 insn); + void POPC_reg(u64 insn); + void POPC_cbuf(u64 insn); + void POPC_imm(u64 insn); + void PRET(u64 insn); + void PRMT_reg(u64 insn); + void PRMT_rc(u64 insn); + void PRMT_cr(u64 insn); + void PRMT_imm(u64 insn); + void PSET(u64 insn); + void PSETP(u64 insn); + void R2B(u64 insn); + void R2P_reg(u64 insn); + void R2P_cbuf(u64 insn); + void R2P_imm(u64 insn); + void RAM(u64 insn); + void RED(u64 insn); + void RET(u64 insn); + void RRO_reg(u64 insn); + void RRO_cbuf(u64 insn); + void RRO_imm(u64 insn); + void RTT(u64 insn); + void S2R(u64 insn); + void SAM(u64 insn); + void SEL_reg(u64 insn); + void SEL_cbuf(u64 insn); + void SEL_imm(u64 insn); + void SETCRSPTR(u64 insn); + void SETLMEMBASE(u64 insn); + void SHF_l_reg(u64 insn); + void SHF_l_imm(u64 insn); + void SHF_r_reg(u64 insn); + void SHF_r_imm(u64 insn); + void SHFL(u64 insn); + void SHL_reg(u64 insn); + void SHL_cbuf(u64 insn); + void SHL_imm(u64 insn); + void SHR_reg(u64 insn); + void SHR_cbuf(u64 insn); + void SHR_imm(u64 insn); + void SSY(); + void ST(u64 insn); + void STG(u64 insn); + void STL(u64 insn); + void STP(u64 insn); + void STS(u64 insn); + void SUATOM(u64 insn); + void SUATOM_cas(u64 insn); + void SULD(u64 insn); + void SURED(u64 insn); + void SUST(u64 insn); + void SYNC(u64 insn); + void TEX(u64 insn); + void TEX_b(u64 insn); + void TEXS(u64 insn); + void TLD(u64 insn); + void TLD_b(u64 insn); + void TLD4(u64 insn); + void TLD4_b(u64 insn); + void TLD4S(u64 insn); + void TLDS(u64 insn); + void TMML(u64 insn); + void TMML_b(u64 insn); + void TXA(u64 insn); + void TXD(u64 insn); + void TXD_b(u64 insn); + void TXQ(u64 insn); + void TXQ_b(u64 insn); + void VABSDIFF(u64 insn); + void VABSDIFF4(u64 insn); + void VADD(u64 insn); + void VMAD(u64 insn); + void VMNMX(u64 insn); + void VOTE(u64 insn); + void VOTE_vtg(u64 insn); + void VSET(u64 insn); + void VSETP(u64 insn); + void VSHL(u64 insn); + void VSHR(u64 insn); + void XMAD_reg(u64 insn); + void XMAD_rc(u64 insn); + void XMAD_cr(u64 insn); + void XMAD_imm(u64 insn); + + [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::U64 L(IR::Reg reg); + [[nodiscard]] IR::F32 F(IR::Reg reg); + [[nodiscard]] IR::F64 D(IR::Reg reg); + + void X(IR::Reg dest_reg, const IR::U32& value); + void L(IR::Reg dest_reg, const IR::U64& value); + void F(IR::Reg dest_reg, const IR::F32& value); + void D(IR::Reg dest_reg, const IR::F64& value); + + [[nodiscard]] IR::U32 GetReg8(u64 insn); + [[nodiscard]] IR::U32 GetReg20(u64 insn); + [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); + + [[nodiscard]] IR::U32 GetCbuf(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); + [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); + + [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); + + [[nodiscard]] IR::U32 GetImm32(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); + + void SetZFlag(const IR::U1& value); + void SetSFlag(const IR::U1& value); + void SetCFlag(const IR::U1& value); + void SetOFlag(const IR::U1& value); + + void ResetZero(); + void ResetSFlag(); + void ResetCFlag(); + void ResetOFlag(); +}; + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..8ffd84867 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, + bool cc) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const iadd{insn}; + + if (sat) { + throw NotImplementedException("IADD SAT"); + } + if (x && po) { + throw NotImplementedException("IADD X+PO"); + } + // Operand A is always read from here, negated if needed + IR::U32 op_a{v.X(iadd.src_a)}; + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + // Add both operands + IR::U32 result{v.ir.IAdd(op_a, op_b)}; + if (x) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + result = v.ir.IAdd(result, carry); + } + if (po) { + // .PO adds one to the result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + if (cc) { + // Store flags + // TODO: Does this grab the result pre-PO or after? + if (po) { + throw NotImplementedException("IADD CC+PO"); + } + // TODO: How does CC behave when X is set? + if (x) { + throw NotImplementedException("IADD X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); + } + // Store result + v.X(iadd.dest_reg, result); +} + +void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<50, 1, u64> sat; + } const iadd{insn}; + + const bool po{iadd.three_for_po == 3}; + if (!po && iadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD_reg(u64 insn) { + IADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IADD_cbuf(u64 insn) { + IADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::IADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> x; + BitField<54, 1, u64> sat; + BitField<55, 2, u64> three_for_po; + BitField<56, 1, u64> neg_a; + } const iadd32i{insn}; + + const bool po{iadd32i.three_for_po == 3}; + const bool neg_a{!po && iadd32i.neg_a != 0}; + IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..040cfc10f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -0,0 +1,122 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Shift : u64 { + None, + Right, + Left, +}; +enum class Half : u64 { + All, + Lower, + Upper, +}; + +[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { + constexpr bool is_signed{false}; + switch (half) { + case Half::All: + return value; + case Half::Lower: + return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); + case Half::Upper: + return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); + } + throw NotImplementedException("Invalid half"); +} + +[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { + switch (shift) { + case Shift::None: + return value; + case Shift::Right: { + // 33-bit RS IADD3 edge case + const IR::U1 edge_case{ir.GetCarryFromOp(value)}; + const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; + return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; + } + case Shift::Left: + return ir.ShiftLeftLogical(value, ir.Imm32(16)); + } + throw NotImplementedException("Invalid shift"); +} + +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, + Shift shift = Shift::None) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> x; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> neg_b; + BitField<51, 1, u64> neg_a; + } iadd3{insn}; + + if (iadd3.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iadd3.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + if (iadd3.neg_c != 0) { + op_c = v.ir.INeg(op_c); + } + IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; + if (iadd3.x != 0) { + // TODO: How does RS behave when X is set? + if (shift == Shift::Right) { + throw NotImplementedException("IADD3 X+RS"); + } + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + lhs_1 = v.ir.IAdd(lhs_1, carry); + } + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; + const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; + + v.X(iadd3.dest_reg, result); + if (iadd3.cc != 0) { + // TODO: How does CC behave when X is set? + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; + v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); + } +} +} // Anonymous namespace + +void TranslatorVisitor::IADD3_reg(u64 insn) { + union { + u64 insn; + BitField<37, 2, Shift> shift; + BitField<35, 2, Half> half_a; + BitField<33, 2, Half> half_b; + BitField<31, 2, Half> half_c; + } const iadd3{insn}; + + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; + const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; + const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; + IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); +} + +void TranslatorVisitor::IADD3_cbuf(u64 insn) { + IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_imm(u64 insn) { + IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..ba6e01926 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const icmp{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const bool is_signed{icmp.is_signed != 0}; + const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; + + const IR::U32 src_reg{v.X(icmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(icmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ICMP_reg(u64 insn) { + ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_rc(u64 insn) { + ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::ICMP_cr(u64 insn) { + ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_imm(u64 insn) { + ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..8ce1aee04 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -0,0 +1,80 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> x; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const iset{insn}; + + const IR::U32 src_a{v.X(iset.src_reg)}; + const bool is_signed{iset.is_signed != 0}; + const IR::U32 zero{v.ir.Imm32(0)}; + const bool x{iset.x != 0}; + const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; + + IR::U1 pred{v.ir.GetPred(iset.pred)}; + if (iset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; + + v.X(iset.dest_reg, result); + if (iset.cc != 0) { + if (x) { + throw NotImplementedException("ISET.CC + X"); + } + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (iset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::ISET_reg(u64 insn) { + ISET(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISET_cbuf(u64 insn) { + ISET(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISET_imm(u64 insn) { + ISET(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..0b8119ddd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("I2F CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(i2f.fp_rounding), + .fmz_mode = IR::FmzMode::DontCare, + }; + auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize), + static_cast<size_t>(conversion_src_bitsize), is_signed, src, + fp_control)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min())); + } else if (src_bitsize == 32) { + is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..5feefc0ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class MaxShift : u64 { + U32, + Undefined, + U64, + S64, +}; + +IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, + bool right_shift, bool is_signed) { + if (!right_shift) { + return ir.ShiftLeftLogical(packed_int, safe_shift); + } + if (is_signed) { + return ir.ShiftRightArithmetic(packed_int, safe_shift); + } + return ir.ShiftRightLogical(packed_int, safe_shift); +} + +void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, + bool right_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<0, 8, IR::Reg> lo_bits_reg; + BitField<37, 2, MaxShift> max_shift; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> x_mode; + BitField<50, 1, u64> wrap; + } const shf{insn}; + + if (shf.cc != 0) { + throw NotImplementedException("SHF CC"); + } + if (shf.x_mode != 0) { + throw NotImplementedException("SHF X Mode"); + } + if (shf.max_shift == MaxShift::Undefined) { + throw NotImplementedException("SHF Use of undefined MaxShift value"); + } + const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; + const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; + const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; + const IR::U32 safe_shift{shf.wrap != 0 + ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) + : v.ir.UMin(shift, max_shift)}; + + const bool is_signed{shf.max_shift == MaxShift::S64}; + const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; + const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; + + const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; + v.X(shf.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHF_l_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_l_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_r_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); +} + +void TranslatorVisitor::SHF_r_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..1badbacc4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 2, u64> mode; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const imnmx{insn}; + + if (imnmx.cc != 0) { + throw NotImplementedException("IMNMX CC"); + } + + if (imnmx.mode != 0) { + throw NotImplementedException("IMNMX.MODE"); + } + + const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; + const IR::U32 op_a{v.X(imnmx.src_reg)}; + IR::U32 min; + IR::U32 max; + + if (imnmx.is_signed != 0) { + min = IR::U32{v.ir.SMin(op_a, op_b)}; + max = IR::U32{v.ir.SMax(op_a, op_b)}; + } else { + min = IR::U32{v.ir.UMin(op_a, op_b)}; + max = IR::U32{v.ir.UMax(op_a, op_b)}; + } + if (imnmx.neg_pred != 0) { + std::swap(min, max); + } + + const IR::U32 result{v.ir.Select(pred, min, max)}; + v.X(imnmx.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IMNMX_reg(u64 insn) { + IMNMX(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IMNMX_cbuf(u64 insn) { + IMNMX(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IMNMX_imm(u64 insn) { + IMNMX(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + } const popc{insn}; + + const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); + const IR::U32 result = v.ir.BitCount(operand); + v.X(popc.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::POPC_reg(u64 insn) { + POPC(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::POPC_cbuf(u64 insn) { + POPC(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::POPC_imm(u64 insn) { + POPC(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..044671943 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -0,0 +1,86 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, + u64 scale_imm) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + } const iscadd{insn}; + + const bool po{neg_a && neg_b}; + IR::U32 op_a{v.X(iscadd.op_a)}; + if (po) { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); + } else { + // When PO is not present, the bits are interpreted as negation + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + if (neg_b) { + op_b = v.ir.INeg(op_b); + } + } + // With the operands already processed, scale A + const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))}; + const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; + + const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; + v.X(iscadd.dest_reg, result); + + if (cc) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + const IR::U1 carry{v.ir.GetCarryFromOp(result)}; + const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; + v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); + v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); + } +} + +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); +} + +} // Anonymous namespace + +void TranslatorVisitor::ISCADD_reg(u64 insn) { + ISCADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISCADD_cbuf(u64 insn) { + ISCADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISCADD_imm(u64 insn) { + ISCADD(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::ISCADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 5, u64> scale; + } const iscadd{insn}; + + return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..bee10e5b9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> x; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const isetp{insn}; + + const bool is_signed{isetp.is_signed != 0}; + const bool x{isetp.x != 0}; + const BooleanOp bop{isetp.bop}; + const CompareOp compare_op{isetp.compare_op}; + const IR::U32 op_a{v.X(isetp.src_reg_a)}; + const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; + const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(isetp.dest_pred_a, result_a); + v.ir.SetPred(isetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::ISETP_reg(u64 insn) { + ISETP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISETP_cbuf(u64 insn) { + ISETP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISETP_imm(u64 insn) { + ISETP(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..20af68852 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> w; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + } const shl{insn}; + + if (shl.x != 0) { + throw NotImplementedException("SHL.X"); + } + if (shl.cc != 0) { + throw NotImplementedException("SHL.CC"); + } + const IR::U32 base{v.X(shl.src_reg_a)}; + IR::U32 result; + if (shl.w != 0) { + // When .W is set, the shift value is wrapped + // To emulate this we just have to wrap it ourselves. + const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; + result = v.ir.ShiftLeftLogical(base, shift); + } else { + // When .W is not set, the shift value is clamped between 0 and 32. + // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. + // We can safely evaluate an out of bounds shift according to the SPIR-V specification: + // + // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical + // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than + // or equal to the bit width of the components of Base." + // + // And on the GLASM specification it is also safe to evaluate out of bounds: + // + // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt + // "The results of a shift operation ("<<") are undefined if the value of the second operand + // is negative, or greater than or equal to the number of bits in the first operand." + // + // Emphasis on undefined results in contrast to undefined behavior. + // + const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; + const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; + result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; + } + v.X(shl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHL_reg(u64 insn) { + SHL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHL_cbuf(u64 insn) { + SHL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHL_imm(u64 insn) { + SHL(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..be00bb605 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> is_wrapped; + BitField<40, 1, u64> brev; + BitField<43, 1, u64> xmode; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_signed; + } const shr{insn}; + + if (shr.xmode != 0) { + throw NotImplementedException("SHR.XMODE"); + } + if (shr.cc != 0) { + throw NotImplementedException("SHR.CC"); + } + + IR::U32 base{v.X(shr.src_reg_a)}; + if (shr.brev == 1) { + base = v.ir.BitReverse(base); + } + IR::U32 result; + const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); + if (shr.is_signed == 1) { + result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; + } else { + result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; + } + + if (shr.is_wrapped == 0) { + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 safe_bits{v.ir.Imm32(32)}; + + const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; + const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; + const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; + } + v.X(shr.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHR_reg(u64 insn) { + SHR(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHR_cbuf(u64 insn) { + SHR(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHR_imm(u64 insn) { + SHR(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..2932cdc42 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -0,0 +1,135 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SelectMode : u64 { + Default, + CLO, + CHI, + CSFU, + CBCC, +}; + +enum class Half : u64 { + H0, // Least-significant bits (15:0) + H1, // Most-significant bits (31:16) +}; + +IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { + const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; + return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); +} + +void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, + SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_a_signed; + BitField<49, 1, u64> is_b_signed; + BitField<53, 1, Half> half_a; + } const xmad{insn}; + + if (x) { + throw NotImplementedException("XMAD X"); + } + const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; + const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; + + IR::U32 product{v.ir.IMul(op_a, op_b)}; + if (psl) { + // .PSL shifts the product 16 bits + product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); + } + const IR::U32 op_c{[&]() -> IR::U32 { + switch (select_mode) { + case SelectMode::Default: + return src_c; + case SelectMode::CLO: + return ExtractHalf(v, src_c, Half::H0, false); + case SelectMode::CHI: + return ExtractHalf(v, src_c, Half::H1, false); + case SelectMode::CBCC: + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); + case SelectMode::CSFU: + throw NotImplementedException("XMAD CSFU"); + } + throw NotImplementedException("Invalid XMAD select mode {}", select_mode); + }()}; + IR::U32 result{v.ir.IAdd(product, op_c)}; + if (mrg) { + // .MRG inserts src_b [15:0] into result's [31:16]. + const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; + result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); + } + if (xmad.cc) { + throw NotImplementedException("XMAD CC"); + } + // Store result + v.X(xmad.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::XMAD_reg(u64 insn) { + union { + u64 raw; + BitField<35, 1, Half> half_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); +} + +void TranslatorVisitor::XMAD_rc(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + } const xmad{insn}; + + XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, + xmad.x != 0); +} + +void TranslatorVisitor::XMAD_cr(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + BitField<55, 1, u64> psl; + BitField<56, 1, u64> mrg; + } const xmad{insn}; + + XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); +} + +void TranslatorVisitor::XMAD_imm(u64 insn) { + union { + u64 raw; + BitField<20, 16, u64> src_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, + Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..53e8d8923 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -0,0 +1,126 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class IntegerWidth : u64 { + Byte, + Short, + Word, +}; + +[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { + switch (width) { + case IntegerWidth::Byte: + return ir.Imm32(8); + case IntegerWidth::Short: + return ir.Imm32(16); + case IntegerWidth::Word: + return ir.Imm32(32); + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, + IntegerWidth dst_width) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 count{WidthSize(ir, dst_width)}; + return ir.BitFieldExtract(src, zero, count, false); +} + +[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, + bool dst_signed, bool src_signed) { + IR::U32 min{}; + IR::U32 max{}; + const IR::U32 zero{ir.Imm32(0)}; + switch (dst_width) { + case IntegerWidth::Byte: + min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; + max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); + break; + case IntegerWidth::Short: + min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; + max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); + break; + case IntegerWidth::Word: + min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; + max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); + break; + default: + throw NotImplementedException("Invalid width {}", dst_width); + } + const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; + return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); +} + +void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, IntegerWidth> dst_fmt; + BitField<12, 1, u64> dst_fmt_sign; + BitField<10, 2, IntegerWidth> src_fmt; + BitField<13, 1, u64> src_fmt_sign; + BitField<41, 3, u64> selector; + BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> abs; + BitField<50, 1, u64> sat; + } const i2i{insn}; + + if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { + throw NotImplementedException("16-bit source format incompatible with selector {}", + i2i.selector); + } + if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { + throw NotImplementedException("32-bit source format incompatible with selector {}", + i2i.selector); + } + + const s32 selector{static_cast<s32>(i2i.selector)}; + const IR::U32 offset{v.ir.Imm32(selector * 8)}; + const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; + const bool src_signed{i2i.src_fmt_sign != 0}; + const bool dst_signed{i2i.dst_fmt_sign != 0}; + const bool sat{i2i.sat != 0}; + + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; + if (i2i.abs != 0) { + src_values = v.ir.IAbs(src_values); + } + if (i2i.neg != 0) { + src_values = v.ir.INeg(src_values); + } + const IR::U32 result{ + sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) + : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; + + v.X(i2i.dest_reg, result); + if (i2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.ResetCFlag(); + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2I_reg(u64 insn) { + I2I(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::I2I_cbuf(u64 insn) { + I2I(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::I2I_imm(u64 insn) { + I2I(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..9b85f8059 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +} // Anonymous namespace + +void TranslatorVisitor::ISBERD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<31, 1, u64> skew; + BitField<32, 1, u64> o; + BitField<33, 2, Mode> mode; + BitField<47, 2, Shift> shift; + } const isberd{insn}; + + if (isberd.skew != 0) { + throw NotImplementedException("SKEW"); + } + if (isberd.o != 0) { + throw NotImplementedException("O"); + } + if (isberd.mode != Mode::Default) { + throw NotImplementedException("Mode {}", isberd.mode.Value()); + } + if (isberd.shift != Shift::Default) { + throw NotImplementedException("Shift {}", isberd.shift.Value()); + } + LOG_WARNING(Shader, "(STUBBED) called"); + X(isberd.dest_reg, X(isberd.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..2300088e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +using namespace LDC; +namespace { +std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, + const IR::U32& reg, const IR::U32& imm) { + switch (mode) { + case Mode::Default: + return {imm_index, ir.IAdd(reg, imm)}; + default: + break; + } + throw NotImplementedException("Mode {}", mode); +} +} // Anonymous namespace + +void TranslatorVisitor::LDC(u64 insn) { + const Encoding ldc{insn}; + const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; + const IR::U32 reg{X(ldc.src_reg)}; + const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; + const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; + switch (ldc.size) { + case Size::U8: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); + break; + case Size::S8: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); + break; + case Size::U16: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); + break; + case Size::S16: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); + break; + case Size::B32: + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); + break; + case Size::B64: { + if (!IR::IsAligned(ldc.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register"); + } + const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; + for (int i = 0; i < 2; ++i) { + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + default: + throw NotImplementedException("Invalid size {}", ldc.size.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..4a0f04e47 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, + bool neg, bool x) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + + if (x) { + throw NotImplementedException("LEA.HI X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.HI Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.HI CC"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; + const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; + + const s32 hi_scale{32 - static_cast<s32>(scale)}; + const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; + const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; + v.X(lea.dest_reg, result); +} + +void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<39, 5, u64> scale; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + if (lea.x != 0) { + throw NotImplementedException("LEA.LO X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.LO CC"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const s32 scale{static_cast<s32>(lea.scale)}; + const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; + const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset)}; + v.X(lea.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::LEA_hi_reg(u64 insn) { + union { + u64 insn; + BitField<28, 5, u64> scale; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { + union { + u64 insn; + BitField<51, 5, u64> scale; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_lo_reg(u64 insn) { + LEA_lo(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { + LEA_lo(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LEA_lo_imm(u64 insn) { + LEA_lo(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..924fb7a40 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -0,0 +1,196 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + +enum class InterpolationMode : u64 { + Pass, + Multiply, + Constant, + Sc, +}; + +enum class SampleMode : u64 { + Default, + Centroid, + Offset, +}; + +u32 NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} + +template <typename F> +void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { + const IR::U32 index_value{v.X(index_reg)}; + for (u32 element = 0; element < num_elements; ++element) { + const IR::U32 final_offset{ + element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; + f(element, final_offset); + } +} + +} // Anonymous namespace + +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> vertex_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const IR::U32 vertex{X(ald.vertex_reg)}; + const u32 num_elements{NumElements(ald.size)}; + if (ald.index_reg == IR::Reg::RZ) { + for (u32 element = 0; element < num_elements; ++element) { + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex)); + } + } + return; + } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } + HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex)); + }); +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> vertex_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const IR::U32 vertex{X(ast.vertex_reg)}; + const u32 num_elements{NumElements(ast.size)}; + if (ast.index_reg == IR::Reg::RZ) { + for (u32 element = 0; element < num_elements; ++element) { + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element))); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex); + } + } + return; + } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } + HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex); + }); +} + +void TranslatorVisitor::IPA(u64 insn) { + // IPA is the instruction used to read varyings from a fragment shader. + // gl_FragCoord is mapped to the gl_Position attribute. + // It yields unknown results when used outside of the fragment shader stage. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 8, IR::Reg> multiplier; + BitField<30, 8, IR::Attribute> attribute; + BitField<38, 1, u64> idx; + BitField<51, 1, u64> sat; + BitField<52, 2, SampleMode> sample_mode; + BitField<54, 2, InterpolationMode> interpolation_mode; + } const ipa{insn}; + + // Indexed IPAs are used for indexed varyings. + // For example: + // + // in vec4 colors[4]; + // uniform int idx; + // void main() { + // gl_FragColor = colors[idx]; + // } + const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; + const IR::Attribute attribute{ipa.attribute}; + IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg)) + : ir.GetAttribute(attribute)}; + if (IR::IsGeneric(attribute)) { + const ProgramHeader& sph{env.SPH()}; + const u32 attr_index{IR::GenericAttributeIndex(attribute)}; + const u32 element{static_cast<u32>(attribute) % 4}; + const std::array input_map{sph.ps.GenericInputMap(attr_index)}; + const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; + if (is_perspective) { + const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; + value = ir.FPMul(value, position_w); + } + } + if (ipa.interpolation_mode == InterpolationMode::Multiply) { + value = ir.FPMul(value, F(ipa.multiplier)); + } + + // Saturated IPAs are generally generated out of clamped varyings. + // For example: clamp(some_varying, 0.0, 1.0) + const bool is_saturated{ipa.sat != 0}; + if (is_saturated) { + if (attribute == IR::Attribute::FrontFace) { + throw NotImplementedException("IPA.SAT on FrontFace"); + } + value = ir.FPSaturate(value); + } + + F(ipa.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..d2a1dbf61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -0,0 +1,218 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +IR::U32 Offset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); + } else { + const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) { + const IR::U32 offset{Offset(v, insn)}; + if (offset.IsImmediate()) { + return {v.ir.Imm32(offset.U32() / 4), offset}; + } else { + return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; + } +} + +std::pair<int, bool> GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} + +IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { + const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; + const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; + return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const auto [word_offset, offset]{WordOffset(*this, insn)}; + const IR::U32 word{LoadLocal(*this, word_offset, offset)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, word); + for (int i = 1; i < bit_size / 32; ++i) { + const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; + const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; + X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const auto [word_offset, offset]{WordOffset(*this, insn)}; + if (offset.IsImmediate()) { + // TODO: Support storing out of bounds at runtime + if (offset.U32() >= env.LocalMemorySize()) { + LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", + offset.U32(), env.LocalMemorySize()); + return; + } + } + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..36c5cff2f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -0,0 +1,184 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LoadSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, + U128, // ??? +}; + +enum class StoreSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, +}; + +// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (cache in L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) +}; + +// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory) +}; + +IR::U64 Address(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 24, s64> addr_offset; + BitField<20, 24, u64> rz_addr_offset; + BitField<45, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + // LDG/STG without .E uses a 32-bit pointer, zero-extend it + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + if (!IR::IsAligned(mem.addr_reg, 2)) { + throw NotImplementedException("Unaligned address register"); + } + // Pack two registers to build the 64-bit address + return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast<u64>(mem.rz_addr_offset.Value()); + } else { + return static_cast<u64>(mem.addr_offset.Value()); + } + }()}; + // Apply the offset + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDG(u64 insn) { + // LDG loads global memory into registers + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<46, 2, LoadCache> cache; + BitField<48, 3, LoadSize> size; + } const ldg{insn}; + + // Pointer to load data from + const IR::U64 address{Address(*this, insn)}; + const IR::Reg dest_reg{ldg.dest_reg}; + switch (ldg.size) { + case LoadSize::U8: + X(dest_reg, ir.LoadGlobalU8(address)); + break; + case LoadSize::S8: + X(dest_reg, ir.LoadGlobalS8(address)); + break; + case LoadSize::U16: + X(dest_reg, ir.LoadGlobalU16(address)); + break; + case LoadSize::S16: + X(dest_reg, ir.LoadGlobalS16(address)); + break; + case LoadSize::B32: + X(dest_reg, ir.LoadGlobal32(address)); + break; + case LoadSize::B64: { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal64(address)}; + for (int i = 0; i < 2; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + case LoadSize::B128: + case LoadSize::U128: { + if (!IR::IsAligned(dest_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal128(address)}; + for (int i = 0; i < 4; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); + } + break; + } + default: + throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); + } +} + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + + // Pointer to store data into + const IR::U64 address{Address(*this, insn)}; + const IR::Reg data_reg{stg.data_reg}; + switch (stg.size) { + case StoreSize::U8: + ir.WriteGlobalU8(address, X(data_reg)); + break; + case StoreSize::S8: + ir.WriteGlobalS8(address, X(data_reg)); + break; + case StoreSize::U16: + ir.WriteGlobalU16(address, X(data_reg)); + break; + case StoreSize::S16: + ir.WriteGlobalS16(address, X(data_reg)); + break; + case StoreSize::B32: + ir.WriteGlobal32(address, X(data_reg)); + break; + case StoreSize::B64: { + if (!IR::IsAligned(data_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; + ir.WriteGlobal64(address, vector); + break; + } + case StoreSize::B128: + if (!IR::IsAligned(data_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ + ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; + ir.WriteGlobal128(address, vector); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..92cd27ed4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LogicalOp : u64 { + AND, + OR, + XOR, + PASS_B, +}; + +[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, LogicalOp op) { + switch (op) { + case LogicalOp::AND: + return ir.BitwiseAnd(operand_1, operand_2); + case LogicalOp::OR: + return ir.BitwiseOr(operand_1, operand_2); + case LogicalOp::XOR: + return ir.BitwiseXor(operand_1, operand_2); + case LogicalOp::PASS_B: + return operand_2; + default: + throw NotImplementedException("Invalid Logical operation {}", op); + } +} + +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, + LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt, + IR::Pred dest_pred = IR::Pred::PT) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + } const lop{insn}; + + if (x) { + throw NotImplementedException("X"); + } + IR::U32 op_a{v.X(lop.src_reg)}; + if (inv_a != 0) { + op_a = v.ir.BitwiseNot(op_a); + } + if (inv_b != 0) { + op_b = v.ir.BitwiseNot(op_b); + } + + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; + if (pred_op) { + const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; + v.ir.SetPred(dest_pred, pred_result); + } + if (cc) { + if (bit_op == LogicalOp::PASS_B) { + v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); + v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); + } else { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } + v.X(lop.dest_reg, result); +} + +void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<39, 1, u64> inv_a; + BitField<40, 1, u64> inv_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> dest_pred; + } const lop{insn}; + + LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, + lop.pred_op, lop.dest_pred); +} +} // Anonymous namespace + +void TranslatorVisitor::LOP_reg(u64 insn) { + LOP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LOP_cbuf(u64 insn) { + LOP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LOP_imm(u64 insn) { + LOP(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::LOP32I(u64 insn) { + union { + u64 raw; + BitField<53, 2, LogicalOp> bit_op; + BitField<57, 1, u64> x; + BitField<52, 1, u64> cc; + BitField<55, 1, u64> inv_a; + BitField<56, 1, u64> inv_b; + } const lop32i{insn}; + + LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, + lop32i.inv_b != 0, lop32i.bit_op); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..e0fe47912 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -0,0 +1,122 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 +// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) +IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, + u64 ttbl) { + IR::U32 r{ir.Imm32(0)}; + const IR::U32 not_a{ir.BitwiseNot(a)}; + const IR::U32 not_b{ir.BitwiseNot(b)}; + const IR::U32 not_c{ir.BitwiseNot(c)}; + if (ttbl & 0x01) { + // r |= ~a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x02) { + // r |= ~a & ~b & c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x04) { + // r |= ~a & b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x08) { + // r |= ~a & b & c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x10) { + // r |= a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x20) { + // r |= a & ~b & c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x40) { + // r |= a & b & ~c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x80) { + // r |= a & b & c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + return r; +} + +IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> cc; + } const lop3{insn}; + + if (lop3.cc != 0) { + throw NotImplementedException("LOP3 CC"); + } + + const IR::U32 op_a{v.X(lop3.src_reg)}; + const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; + v.X(lop3.dest_reg, result); + return result; +} + +u64 GetLut48(u64 insn) { + union { + u64 raw; + BitField<48, 8, u64> lut; + } const lut{insn}; + return lut.lut; +} +} // Anonymous namespace + +void TranslatorVisitor::LOP3_reg(u64 insn) { + union { + u64 insn; + BitField<28, 8, u64> lut; + BitField<38, 1, u64> x; + BitField<36, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop3{insn}; + + if (lop3.x != 0) { + throw NotImplementedException("LOP3 X"); + } + const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; + const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; + ir.SetPred(lop3.pred, pred_result); +} + +void TranslatorVisitor::LOP3_cbuf(u64 insn) { + LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); +} + +void TranslatorVisitor::LOP3_imm(u64 insn) { + LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..6bb08db8a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { + throw NotImplementedException("Non-full move mask"); + } + v.X(mov.dest_reg, src); +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + MOV(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + MOV(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; + +void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { + switch (index) { + case 0: + return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); + case 1: + return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); + case 2: + return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); + case 3: + return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); + default: + throw LogicError("Unreachable R2P index"); + } +} + +void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { + union { + u64 raw; + BitField<8, 8, IR::Reg> src_reg; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const r2p{insn}; + const IR::U32 src{v.X(r2p.src_reg)}; + const IR::U32 count{v.ir.Imm32(1)}; + const bool pr_mode{r2p.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8}; + for (u32 index = 0; index < num_items; ++index) { + const IR::U32 offset{v.ir.Imm32(offset_base + index)}; + const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; + const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; + const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; + const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; + if (pr_mode) { + const IR::Pred pred{index}; + v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); + } else { + SetFlag(v.ir, inv_mask_bit, src_bit, index); + } + } +} +} // Anonymous namespace + +void TranslatorVisitor::R2P_reg(u64 insn) { + R2P(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::R2P_cbuf(u64 insn) { + R2P(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::R2P_imm(u64 insn) { + R2P(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..20cb2674e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -0,0 +1,181 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SpecialRegister : u64 { + SR_LANEID = 0, + SR_CLOCK = 1, + SR_VIRTCFG = 2, + SR_VIRTID = 3, + SR_PM0 = 4, + SR_PM1 = 5, + SR_PM2 = 6, + SR_PM3 = 7, + SR_PM4 = 8, + SR_PM5 = 9, + SR_PM6 = 10, + SR_PM7 = 11, + SR12 = 12, + SR13 = 13, + SR14 = 14, + SR_ORDERING_TICKET = 15, + SR_PRIM_TYPE = 16, + SR_INVOCATION_ID = 17, + SR_Y_DIRECTION = 18, + SR_THREAD_KILL = 19, + SM_SHADER_TYPE = 20, + SR_DIRECTCBEWRITEADDRESSLOW = 21, + SR_DIRECTCBEWRITEADDRESSHIGH = 22, + SR_DIRECTCBEWRITEENABLE = 23, + SR_MACHINE_ID_0 = 24, + SR_MACHINE_ID_1 = 25, + SR_MACHINE_ID_2 = 26, + SR_MACHINE_ID_3 = 27, + SR_AFFINITY = 28, + SR_INVOCATION_INFO = 29, + SR_WSCALEFACTOR_XY = 30, + SR_WSCALEFACTOR_Z = 31, + SR_TID = 32, + SR_TID_X = 33, + SR_TID_Y = 34, + SR_TID_Z = 35, + SR_CTA_PARAM = 36, + SR_CTAID_X = 37, + SR_CTAID_Y = 38, + SR_CTAID_Z = 39, + SR_NTID = 40, + SR_CirQueueIncrMinusOne = 41, + SR_NLATC = 42, + SR43 = 43, + SR_SM_SPA_VERSION = 44, + SR_MULTIPASSSHADERINFO = 45, + SR_LWINHI = 46, + SR_SWINHI = 47, + SR_SWINLO = 48, + SR_SWINSZ = 49, + SR_SMEMSZ = 50, + SR_SMEMBANKS = 51, + SR_LWINLO = 52, + SR_LWINSZ = 53, + SR_LMEMLOSZ = 54, + SR_LMEMHIOFF = 55, + SR_EQMASK = 56, + SR_LTMASK = 57, + SR_LEMASK = 58, + SR_GTMASK = 59, + SR_GEMASK = 60, + SR_REGALLOC = 61, + SR_BARRIERALLOC = 62, + SR63 = 63, + SR_GLOBALERRORSTATUS = 64, + SR65 = 65, + SR_WARPERRORSTATUS = 66, + SR_WARPERRORSTATUSCLEAR = 67, + SR68 = 68, + SR69 = 69, + SR70 = 70, + SR71 = 71, + SR_PM_HI0 = 72, + SR_PM_HI1 = 73, + SR_PM_HI2 = 74, + SR_PM_HI3 = 75, + SR_PM_HI4 = 76, + SR_PM_HI5 = 77, + SR_PM_HI6 = 78, + SR_PM_HI7 = 79, + SR_CLOCKLO = 80, + SR_CLOCKHI = 81, + SR_GLOBALTIMERLO = 82, + SR_GLOBALTIMERHI = 83, + SR84 = 84, + SR85 = 85, + SR86 = 86, + SR87 = 87, + SR88 = 88, + SR89 = 89, + SR90 = 90, + SR91 = 91, + SR92 = 92, + SR93 = 93, + SR94 = 94, + SR95 = 95, + SR_HWTASKID = 96, + SR_CIRCULARQUEUEENTRYINDEX = 97, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, +}; + +[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { + switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); + case SpecialRegister::SR_THREAD_KILL: + return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; + case SpecialRegister::SR_INVOCATION_INFO: + LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); + return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } + case SpecialRegister::SR_TID_X: + return ir.LocalInvocationIdX(); + case SpecialRegister::SR_TID_Y: + return ir.LocalInvocationIdY(); + case SpecialRegister::SR_TID_Z: + return ir.LocalInvocationIdZ(); + case SpecialRegister::SR_CTAID_X: + return ir.WorkgroupIdX(); + case SpecialRegister::SR_CTAID_Y: + return ir.WorkgroupIdY(); + case SpecialRegister::SR_CTAID_Z: + return ir.WorkgroupIdZ(); + case SpecialRegister::SR_WSCALEFACTOR_XY: + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); + return ir.Imm32(Common::BitCast<u32>(1.0f)); + case SpecialRegister::SR_WSCALEFACTOR_Z: + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); + return ir.Imm32(Common::BitCast<u32>(1.0f)); + case SpecialRegister::SR_LANEID: + return ir.LaneId(); + case SpecialRegister::SR_EQMASK: + return ir.SubgroupEqMask(); + case SpecialRegister::SR_LTMASK: + return ir.SubgroupLtMask(); + case SpecialRegister::SR_LEMASK: + return ir.SubgroupLeMask(); + case SpecialRegister::SR_GTMASK: + return ir.SubgroupGtMask(); + case SpecialRegister::SR_GEMASK: + return ir.SubgroupGeMask(); + case SpecialRegister::SR_Y_DIRECTION: + return ir.BitCast<IR::U32>(ir.YDirection()); + case SpecialRegister::SR_AFFINITY: + LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); + return ir.Imm32(0); // This is the default value hardware returns. + default: + throw NotImplementedException("S2R special register {}", special_register); + } +} +} // Anonymous namespace + +void TranslatorVisitor::S2R(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, SpecialRegister> src_reg; + } const s2r{insn}; + + X(s2r.dest_reg, Read(ir, s2r.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..7e26ab359 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -0,0 +1,283 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { + throw NotImplementedException("Instruction {} is not implemented", opcode); +} + +void TranslatorVisitor::ATOM_cas(u64) { + ThrowNotImplemented(Opcode::ATOM_cas); +} + +void TranslatorVisitor::ATOMS_cas(u64) { + ThrowNotImplemented(Opcode::ATOMS_cas); +} + +void TranslatorVisitor::B2R(u64) { + ThrowNotImplemented(Opcode::B2R); +} + +void TranslatorVisitor::BPT(u64) { + ThrowNotImplemented(Opcode::BPT); +} + +void TranslatorVisitor::BRA(u64) { + ThrowNotImplemented(Opcode::BRA); +} + +void TranslatorVisitor::BRK(u64) { + ThrowNotImplemented(Opcode::BRK); +} + +void TranslatorVisitor::CAL() { + // CAL is a no-op +} + +void TranslatorVisitor::CCTL(u64) { + ThrowNotImplemented(Opcode::CCTL); +} + +void TranslatorVisitor::CCTLL(u64) { + ThrowNotImplemented(Opcode::CCTLL); +} + +void TranslatorVisitor::CONT(u64) { + ThrowNotImplemented(Opcode::CONT); +} + +void TranslatorVisitor::CS2R(u64) { + ThrowNotImplemented(Opcode::CS2R); +} + +void TranslatorVisitor::FCHK_reg(u64) { + ThrowNotImplemented(Opcode::FCHK_reg); +} + +void TranslatorVisitor::FCHK_cbuf(u64) { + ThrowNotImplemented(Opcode::FCHK_cbuf); +} + +void TranslatorVisitor::FCHK_imm(u64) { + ThrowNotImplemented(Opcode::FCHK_imm); +} + +void TranslatorVisitor::GETCRSPTR(u64) { + ThrowNotImplemented(Opcode::GETCRSPTR); +} + +void TranslatorVisitor::GETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::GETLMEMBASE); +} + +void TranslatorVisitor::IDE(u64) { + ThrowNotImplemented(Opcode::IDE); +} + +void TranslatorVisitor::IDP_reg(u64) { + ThrowNotImplemented(Opcode::IDP_reg); +} + +void TranslatorVisitor::IDP_imm(u64) { + ThrowNotImplemented(Opcode::IDP_imm); +} + +void TranslatorVisitor::IMAD_reg(u64) { + ThrowNotImplemented(Opcode::IMAD_reg); +} + +void TranslatorVisitor::IMAD_rc(u64) { + ThrowNotImplemented(Opcode::IMAD_rc); +} + +void TranslatorVisitor::IMAD_cr(u64) { + ThrowNotImplemented(Opcode::IMAD_cr); +} + +void TranslatorVisitor::IMAD_imm(u64) { + ThrowNotImplemented(Opcode::IMAD_imm); +} + +void TranslatorVisitor::IMAD32I(u64) { + ThrowNotImplemented(Opcode::IMAD32I); +} + +void TranslatorVisitor::IMADSP_reg(u64) { + ThrowNotImplemented(Opcode::IMADSP_reg); +} + +void TranslatorVisitor::IMADSP_rc(u64) { + ThrowNotImplemented(Opcode::IMADSP_rc); +} + +void TranslatorVisitor::IMADSP_cr(u64) { + ThrowNotImplemented(Opcode::IMADSP_cr); +} + +void TranslatorVisitor::IMADSP_imm(u64) { + ThrowNotImplemented(Opcode::IMADSP_imm); +} + +void TranslatorVisitor::IMUL_reg(u64) { + ThrowNotImplemented(Opcode::IMUL_reg); +} + +void TranslatorVisitor::IMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::IMUL_cbuf); +} + +void TranslatorVisitor::IMUL_imm(u64) { + ThrowNotImplemented(Opcode::IMUL_imm); +} + +void TranslatorVisitor::IMUL32I(u64) { + ThrowNotImplemented(Opcode::IMUL32I); +} + +void TranslatorVisitor::JCAL(u64) { + ThrowNotImplemented(Opcode::JCAL); +} + +void TranslatorVisitor::JMP(u64) { + ThrowNotImplemented(Opcode::JMP); +} + +void TranslatorVisitor::KIL() { + // KIL is a no-op +} + +void TranslatorVisitor::LD(u64) { + ThrowNotImplemented(Opcode::LD); +} + +void TranslatorVisitor::LEPC(u64) { + ThrowNotImplemented(Opcode::LEPC); +} + +void TranslatorVisitor::LONGJMP(u64) { + ThrowNotImplemented(Opcode::LONGJMP); +} + +void TranslatorVisitor::NOP(u64) { + // NOP is No-Op. +} + +void TranslatorVisitor::PBK() { + // PBK is a no-op +} + +void TranslatorVisitor::PCNT() { + // PCNT is a no-op +} + +void TranslatorVisitor::PEXIT(u64) { + ThrowNotImplemented(Opcode::PEXIT); +} + +void TranslatorVisitor::PLONGJMP(u64) { + ThrowNotImplemented(Opcode::PLONGJMP); +} + +void TranslatorVisitor::PRET(u64) { + ThrowNotImplemented(Opcode::PRET); +} + +void TranslatorVisitor::PRMT_reg(u64) { + ThrowNotImplemented(Opcode::PRMT_reg); +} + +void TranslatorVisitor::PRMT_rc(u64) { + ThrowNotImplemented(Opcode::PRMT_rc); +} + +void TranslatorVisitor::PRMT_cr(u64) { + ThrowNotImplemented(Opcode::PRMT_cr); +} + +void TranslatorVisitor::PRMT_imm(u64) { + ThrowNotImplemented(Opcode::PRMT_imm); +} + +void TranslatorVisitor::R2B(u64) { + ThrowNotImplemented(Opcode::R2B); +} + +void TranslatorVisitor::RAM(u64) { + ThrowNotImplemented(Opcode::RAM); +} + +void TranslatorVisitor::RET(u64) { + ThrowNotImplemented(Opcode::RET); +} + +void TranslatorVisitor::RTT(u64) { + ThrowNotImplemented(Opcode::RTT); +} + +void TranslatorVisitor::SAM(u64) { + ThrowNotImplemented(Opcode::SAM); +} + +void TranslatorVisitor::SETCRSPTR(u64) { + ThrowNotImplemented(Opcode::SETCRSPTR); +} + +void TranslatorVisitor::SETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::SETLMEMBASE); +} + +void TranslatorVisitor::SSY() { + // SSY is a no-op +} + +void TranslatorVisitor::ST(u64) { + ThrowNotImplemented(Opcode::ST); +} + +void TranslatorVisitor::STP(u64) { + ThrowNotImplemented(Opcode::STP); +} + +void TranslatorVisitor::SUATOM_cas(u64) { + ThrowNotImplemented(Opcode::SUATOM_cas); +} + +void TranslatorVisitor::SYNC(u64) { + ThrowNotImplemented(Opcode::SYNC); +} + +void TranslatorVisitor::TXA(u64) { + ThrowNotImplemented(Opcode::TXA); +} + +void TranslatorVisitor::VABSDIFF(u64) { + ThrowNotImplemented(Opcode::VABSDIFF); +} + +void TranslatorVisitor::VABSDIFF4(u64) { + ThrowNotImplemented(Opcode::VABSDIFF4); +} + +void TranslatorVisitor::VADD(u64) { + ThrowNotImplemented(Opcode::VADD); +} + +void TranslatorVisitor::VSET(u64) { + ThrowNotImplemented(Opcode::VSET); +} +void TranslatorVisitor::VSHL(u64) { + ThrowNotImplemented(Opcode::VSHL); +} + +void TranslatorVisitor::VSHR(u64) { + ThrowNotImplemented(Opcode::VSHR); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> output_reg; // Not needed on host + BitField<39, 1, u64> emit; + BitField<40, 1, u64> cut; + } const out{insn}; + + stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); + + if (out.emit != 0) { + v.ir.EmitVertex(stream_index); + } + if (out.cut != 0) { + v.ir.EndPrimitive(stream_index); + } + // Host doesn't need the output register, but we can write to it to avoid undefined reads + v.X(out.dest_reg, v.ir.Imm32(0)); +} +} // Anonymous namespace + +void TranslatorVisitor::OUT_reg(u64 insn) { + OUT(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::OUT_cbuf(u64 insn) { + OUT(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::OUT_imm(u64 insn) { + OUT(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; + const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; + const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; + + ir.SetPred(pset.dest_pred_a, result_a); + ir.SetPred(pset.dest_pred_b, result_b); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..b02789874 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + BitField<47, 1, u64> cc; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 zero{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, zero)}; + + X(pset.dest_reg, result); + if (pset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (pset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..93baa75a9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + } const sel{insn}; + + const IR::U1 pred = v.ir.GetPred(sel.pred); + IR::U32 op_a{v.X(sel.src_reg)}; + IR::U32 op_b{src}; + if (sel.neg_pred != 0) { + std::swap(op_a, op_b); + } + const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; + + v.X(sel.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SEL_reg(u64 insn) { + SEL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SEL_cbuf(u64 insn) { + SEL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SEL_imm(u64 insn) { + SEL(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..63b588ad4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -0,0 +1,205 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bit> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +enum class Size : u64 { + U32, + S32, + U64, + S64, + F32FTZRN, + F16x2FTZRN, + SD32, + SD64, +}; + +enum class AtomicOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + default: + break; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, + const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, + bool is_signed) { + switch (op) { + case AtomicOp::ADD: + return ir.ImageAtomicIAdd(handle, coords, op_b, info); + case AtomicOp::MIN: + return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); + case AtomicOp::MAX: + return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); + case AtomicOp::INC: + return ir.ImageAtomicInc(handle, coords, op_b, info); + case AtomicOp::DEC: + return ir.ImageAtomicDec(handle, coords, op_b, info); + case AtomicOp::AND: + return ir.ImageAtomicAnd(handle, coords, op_b, info); + case AtomicOp::OR: + return ir.ImageAtomicOr(handle, coords, op_b, info); + case AtomicOp::XOR: + return ir.ImageAtomicXor(handle, coords, op_b, info); + case AtomicOp::EXCH: + return ir.ImageAtomicExchange(handle, coords, op_b, info); + default: + throw NotImplementedException("Atomic Operation {}", op); + } +} + +ImageFormat Format(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return ImageFormat::R32_UINT; + default: + break; + } + throw NotImplementedException("Invalid size {}", size); +} + +bool IsSizeInt32(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return true; + default: + return false; + } +} + +void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, + IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, + u64 bound_offset, bool is_bindless, bool write_result) { + if (clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", clamp); + } + if (!IsSizeInt32(size)) { + throw NotImplementedException("Size {}", size); + } + const bool is_signed{size == Size::S32}; + const ImageFormat format{Format(size)}; + const TextureType tex_type{GetType(type)}; + const IR::Value coords{MakeCoords(v, coord_reg, type)}; + + const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) + : v.ir.Imm32(static_cast<u32>(bound_offset * 4))}; + IR::TextureInstInfo info{}; + info.type.Assign(tex_type); + info.image_format.Assign(format); + + // TODO: float/64-bit operand + const IR::Value op_b{v.X(operand_reg)}; + const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; + + if (write_result) { + v.X(dest_reg, IR::U32{color}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::SUATOM(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> is_bindless; + BitField<29, 4, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<51, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<36, 13, u64> bound_offset; // !is_bindless + BitField<39, 8, IR::Reg> bindless_reg; // is_bindless + } const suatom{insn}; + + ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, + suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, + suatom.is_bindless != 0, true); +} + +void TranslatorVisitor::SURED(u64 insn) { + // TODO: confirm offsets + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<21, 3, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<20, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> operand_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sured{insn}; + ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, + sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, + sured.is_bound == 0, false); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..681220a8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -0,0 +1,281 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bit> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +constexpr unsigned R = 1 << 0; +constexpr unsigned G = 1 << 1; +constexpr unsigned B = 1 << 2; +constexpr unsigned A = 1 << 3; + +constexpr std::array MASK{ + 0U, // + R, // + G, // + R | G, // + B, // + R | B, // + G | B, // + R | G | B, // + A, // + R | A, // + G | A, // + R | G | A, // + B | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (volatile) +}; + +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level (L2 and below, not L1) + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory, volatile?) +}; + +ImageFormat Format(Size size) { + switch (size) { + case Size::U8: + return ImageFormat::R8_UINT; + case Size::S8: + return ImageFormat::R8_SINT; + case Size::U16: + return ImageFormat::R16_UINT; + case Size::S16: + return ImageFormat::R16_SINT; + case Size::B32: + return ImageFormat::R32_UINT; + case Size::B64: + return ImageFormat::R32G32_UINT; + case Size::B128: + return ImageFormat::R32G32B32A32_UINT; + } + throw NotImplementedException("Invalid size {}", size); +} + +int SizeInRegs(Size size) { + switch (size) { + case Size::U8: + case Size::S8: + case Size::U16: + case Size::S16: + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B128: + return 4; + } + throw NotImplementedException("Invalid size {}", size); +} + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg), array(1)); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + } + throw NotImplementedException("Invalid type {}", type); +} + +unsigned SwizzleMask(u64 swizzle) { + if (swizzle == 0 || swizzle >= MASK.size()) { + throw NotImplementedException("Invalid swizzle {}", swizzle); + } + return MASK[swizzle]; +} + +IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { + std::array<IR::U32, 4> colors; + for (int i = 0; i < num_regs; ++i) { + colors[static_cast<size_t>(i)] = ir.GetReg(reg + i); + } + for (int i = num_regs; i < 4; ++i) { + colors[static_cast<size_t>(i)] = ir.Imm32(0); + } + return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); +} +} // Anonymous namespace + +void TranslatorVisitor::SULD(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, LoadCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const suld{insn}; + + if (suld.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", suld.clamp.Value()); + } + if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { + throw NotImplementedException("Cache {}", suld.cache.Value()); + } + const bool is_typed{suld.d != 0}; + if (is_typed && suld.ba != 0) { + throw NotImplementedException("BA"); + } + + const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; + const TextureType type{GetType(suld.type)}; + const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; + const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4)) + : X(suld.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + const IR::Value result{ir.ImageRead(handle, coords, info)}; + IR::Reg dest_reg{suld.dest_reg}; + if (is_typed) { + const int num_regs{SizeInRegs(suld.size)}; + for (int i = 0; i < num_regs; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); + } + } else { + const unsigned mask{SwizzleMask(suld.swizzle)}; + const int bits{std::popcount(mask)}; + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) { + throw NotImplementedException("Unaligned destination register"); + } + for (unsigned component = 0; component < 4; ++component) { + if (((mask >> component) & 1) == 0) { + continue; + } + X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); + ++dest_reg; + } + } +} + +void TranslatorVisitor::SUST(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, StoreCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sust{insn}; + + if (sust.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", sust.clamp.Value()); + } + if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { + throw NotImplementedException("Cache {}", sust.cache.Value()); + } + const bool is_typed{sust.d != 0}; + if (is_typed && sust.ba != 0) { + throw NotImplementedException("BA"); + } + const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; + const TextureType type{GetType(sust.type)}; + const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; + const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4)) + : X(sust.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + IR::Value color; + if (is_typed) { + color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); + } else { + const unsigned mask{SwizzleMask(sust.swizzle)}; + if (mask != 0xf) { + throw NotImplementedException("Non-full mask"); + } + color = MakeColor(ir, sust.data_reg, 4); + } + ir.ImageWrite(handle, coords, color, info); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..0046b5edd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -0,0 +1,236 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional<u32> cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type)); + info.is_depth.Assign(tex.dc != 0 ? 1 : 0); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + IR::Reg dest_reg{tex.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; + } + v.F(dest_reg, value); + ++dest_reg; + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..154e7f1a1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -0,0 +1,266 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<53, 4, u64> encoding; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +void CheckAlignment(IR::Reg reg, size_t alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +template <typename... Args> +IR::Value Composite(TranslatorVisitor& v, Args... regs) { + return v.ir.CompositeConstruct(v.F(regs)...); +} + +IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding texs{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::Reg reg_a{texs.src_reg_a}; + const IR::Reg reg_b{texs.src_reg_b}; + IR::TextureInstInfo info{}; + if (texs.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + switch (texs.encoding) { + case 0: // 1D.LZ + info.type.Assign(TextureType::Color1D); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); + case 1: // 2D + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); + case 2: // 2D.LZ + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); + case 3: // 2D.LL + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, + info); + case 4: // 2D.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + {}, {}, {}, info); + case 5: // 2D.LL.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), + v.F(reg_b + 1), v.F(reg_b), {}, info); + case 6: // 2D.LZ.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + zero, {}, info); + case 7: // ARRAY_2D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleImplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + {}, {}, {}, info); + case 8: // ARRAY_2D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + zero, {}, info); + case 9: // ARRAY_2D.LZ.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorArray2D); + info.is_depth.Assign(1); + return v.ir.ImageSampleDrefExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + v.F(reg_b + 1), zero, {}, info); + case 10: // 3D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 11: // 3D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, + info); + case 12: // CUBE + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 13: // CUBE.LL + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), + v.F(reg_b + 1), {}, info); + default: + throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); + } +} + +unsigned Swizzle(u64 insn) { + const Encoding texs{insn}; + const size_t encoding{texs.swizzle}; + if (texs.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + const bool is_shadow{sample.Type() == IR::Type::F32}; + if (is_shadow) { + const bool is_alpha{component == 3}; + return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; + } else { + return IR::F32{v.ir.CompositeExtract(sample, component)}; + } +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding texs{insn}; + switch (index) { + case 0: + return texs.dest_reg_a; + case 1: + CheckAlignment(texs.dest_reg_a, 2); + return texs.dest_reg_a + 1; + case 2: + return texs.dest_reg_b; + case 3: + CheckAlignment(texs.dest_reg_b, 2); + return texs.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array<IR::F32, 4> swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding texs{insn}; + switch (store_index) { + case 1: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEXS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..218cbc1a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -0,0 +1,208 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +enum class OffsetType : u64 { + None = 0, + AOFFI, + PTP, + Invalid, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { + const IR::U32 value1{v.X(reg++)}; + const IR::U32 value2{v.X(reg++)}; + const IR::U32 bitsize{v.ir.Imm32(6)}; + const auto make_vector{[&v, &bitsize](const IR::U32& value) { + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); + }}; + return {make_vector(value1), make_vector(value2)}; +} + +void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, + bool is_bindless) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld4{insn}; + + const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; + + IR::Reg meta_reg{tld4.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::Value offset2; + IR::F32 dref; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + switch (offset_type) { + case OffsetType::None: + break; + case OffsetType::AOFFI: + offset = MakeOffset(v, meta_reg, tld4.type); + break; + case OffsetType::PTP: + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + break; + default: + throw NotImplementedException("Invalid offset type {}", offset_type); + } + if (tld4.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld4.type)); + info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); + info.gather_component.Assign(static_cast<u32>(component_type)); + const IR::Value sample{[&] { + if (tld4.dc == 0) { + return v.ir.ImageGather(handle, coords, offset, offset2, info); + } + return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); + }()}; + + IR::Reg dest_reg{tld4.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld4.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld4.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4(u64 insn) { + union { + u64 raw; + BitField<56, 2, ComponentType> component; + BitField<54, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, false); +} + +void TranslatorVisitor::TLD4_b(u64 insn) { + union { + u64 raw; + BitField<38, 2, ComponentType> component; + BitField<36, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..34efa2d50 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -0,0 +1,134 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F32, + F16, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +union Encoding { + u64 raw; + BitField<55, 1, Precision> precision; + BitField<52, 2, ComponentType> component_type; + BitField<51, 1, u64> aoffi; + BitField<50, 1, u64> dc; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; +}; + +void CheckAlignment(IR::Reg reg, size_t alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tld4s{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))}; + const IR::Reg reg_a{tld4s.src_reg_a}; + const IR::Reg reg_b{tld4s.src_reg_b}; + IR::TextureInstInfo info{}; + if (tld4s.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value())); + info.type.Assign(Shader::TextureType::Color2D); + info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); + IR::Value coords; + if (tld4s.aoffi != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::Value offset = MakeOffset(v, reg_b); + if (tld4s.dc != 0) { + CheckAlignment(reg_b, 2); + IR::F32 dref = v.F(reg_b + 1); + return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); + } + return v.ir.ImageGather(handle, coords, offset, {}, info); + } + if (tld4s.dc != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::F32 dref = v.F(reg_b); + return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); + } + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); + return v.ir.ImageGather(handle, coords, {}, {}, info); +} + +IR::Reg RegStoreComponent32(u64 insn, size_t index) { + const Encoding tlds4{insn}; + switch (index) { + case 0: + return tlds4.dest_reg_a; + case 1: + CheckAlignment(tlds4.dest_reg_a, 2); + return tlds4.dest_reg_a + 1; + case 2: + return tlds4.dest_reg_b; + case 3: + CheckAlignment(tlds4.dest_reg_b, 2); + return tlds4.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + for (size_t component = 0; component < 4; ++component) { + const IR::Reg dest{RegStoreComponent32(insn, component)}; + v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + std::array<IR::F32, 4> swizzled; + for (size_t component = 0; component < 4; ++component) { + swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; + } + const Encoding tld4s{insn}; + v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4S(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..c3fe3ffda --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { + const IR::U32 value{v.X(reg)}; + const u32 base{has_lod_clamp ? 12U : 16U}; + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> aoffi; + BitField<50, 1, u64> lc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> derivate_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const txd{insn}; + + const bool has_lod_clamp = txd.lc != 0; + if (has_lod_clamp) { + throw NotImplementedException("TXD.LC - CLAMP is not implemented"); + } + + IR::Value coords; + u32 num_derivates{}; + IR::Reg base_reg{txd.coord_reg}; + IR::Reg last_reg; + IR::Value handle; + if (is_bindless) { + handle = v.X(base_reg++); + } else { + handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); + } + + const auto read_array{[&]() -> IR::F32 { + const IR::U32 base{v.ir.Imm32(0)}; + const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; + const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; + return v.ir.ConvertUToF(32, 16, array_index); + }}; + switch (txd.type) { + case TextureType::_1D: { + coords = v.F(base_reg); + num_derivates = 1; + last_reg = base_reg + 1; + break; + } + case TextureType::ARRAY_1D: { + last_reg = base_reg + 1; + coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); + num_derivates = 1; + break; + } + case TextureType::_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); + num_derivates = 2; + break; + } + case TextureType::ARRAY_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); + num_derivates = 2; + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + const IR::Reg derivate_reg{txd.derivate_reg}; + IR::Value derivates; + switch (num_derivates) { + case 1: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); + break; + } + case 2: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), + v.F(derivate_reg + 2), v.F(derivate_reg + 3)); + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + IR::Value offset; + if (txd.aoffi != 0) { + offset = MakeOffset(v, last_reg, has_lod_clamp); + } + + IR::F32 lod_clamp; + if (has_lod_clamp) { + // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. + // to convert a fixed point, float(value) / float(1 << fixed_point) + // in this case the fixed_point is 8. + const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; + const IR::F32 fixp_lc{v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; + lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); + } + + IR::TextureInstInfo info{}; + info.type.Assign(GetType(txd.type)); + info.num_derivates.Assign(num_derivates); + info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); + const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; + + IR::Reg dest_reg{txd.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((txd.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (txd.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TXD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..983058303 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{ + [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; + switch (type) { + case TextureType::_1D: + return v.X(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<55, 1, u64> lod; + BitField<50, 1, u64> multisample; + BitField<35, 1, u64> aoffi; + BitField<54, 1, u64> clamp; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld{insn}; + + const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; + + IR::Reg meta_reg{tld.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::U32 lod; + IR::U32 multisample; + if (is_bindless) { + handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); + } + if (tld.lod != 0) { + lod = v.X(meta_reg++); + } else { + lod = v.ir.Imm32(0U); + } + if (tld.aoffi != 0) { + offset = MakeOffset(v, meta_reg, tld.type); + } + if (tld.multisample != 0) { + multisample = v.X(meta_reg++); + } + if (tld.clamp != 0) { + throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld.type)); + const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; + + IR::Reg dest_reg{tld.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TLD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..5dd7e31b2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -0,0 +1,242 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<54, 1, u64> aoffi; + BitField<53, 1, u64> lod; + BitField<55, 1, u64> ms; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; + BitField<53, 4, u64> encoding; +}; + +void CheckAlignment(IR::Reg reg, size_t alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tlds{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; + const IR::Reg reg_a{tlds.src_reg_a}; + const IR::Reg reg_b{tlds.src_reg_b}; + IR::Value coords; + IR::U32 lod{v.ir.Imm32(0U)}; + IR::Value offsets; + IR::U32 multisample; + Shader::TextureType texture_type{}; + switch (tlds.encoding) { + case 0: + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + break; + case 1: + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + lod = v.X(reg_b); + break; + case 2: + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); + break; + case 4: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + offsets = MakeOffset(v, reg_b); + break; + case 5: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + break; + case 6: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + multisample = v.X(reg_b); + break; + case 7: + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color3D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); + break; + case 8: { + CheckAlignment(reg_b, 2); + const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; + texture_type = Shader::TextureType::ColorArray2D; + coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); + break; + } + case 12: + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + offsets = MakeOffset(v, reg_b + 1); + break; + default: + throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); + } + IR::TextureInstInfo info{}; + if (tlds.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.type.Assign(texture_type); + return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); +} + +unsigned Swizzle(u64 insn) { + const Encoding tlds{insn}; + const size_t encoding{tlds.swizzle}; + if (tlds.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + return IR::F32{v.ir.CompositeExtract(sample, component)}; +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding tlds{insn}; + switch (index) { + case 0: + return tlds.dest_reg_a; + case 1: + CheckAlignment(tlds.dest_reg_a, 2); + return tlds.dest_reg_a + 1; + case 2: + return tlds.dest_reg_b; + case 3: + CheckAlignment(tlds.dest_reg_b, 2); + return tlds.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array<IR::F32, 4> swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding tlds{insn}; + switch (store_index) { + case 1: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLDS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..aea3c0e62 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -0,0 +1,131 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type) { + switch (type) { + case TextureType::_1D: + return Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + // The ISA reads an array component here, but this is not needed on high level shading languages + // We are dropping this information. + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.F(reg + 1); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> ndv; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tmml{insn}; + + if ((tmml.mask & 0b1100) != 0) { + throw NotImplementedException("TMML BA results are not implmented"); + } + const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; + + IR::U32 handle; + IR::Reg meta_reg{tmml.meta_reg}; + if (is_bindless) { + handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tmml.type)); + const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; + + IR::Reg dest_reg{tmml.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tmml.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value{v.ir.CompositeExtract(sample, element)}; + if (element < 2) { + IR::U32 casted_value; + if (element == 0) { + casted_value = v.ir.ConvertFToU(32, value); + } else { + casted_value = v.ir.ConvertFToS(16, value); + } + v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); + } else { + v.F(dest_reg, value); + } + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TMML(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TMML_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..0459e5473 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Dimension = 1, + TextureType = 2, + SamplePos = 5, +}; + +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { + switch (mode) { + case Mode::Dimension: { + const IR::U32 lod{v.X(src_reg)}; + return v.ir.ImageQueryDimension(handle, lod); + } + case Mode::TextureType: + case Mode::SamplePos: + default: + throw NotImplementedException("Mode {}", mode); + } +} + +void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<22, 3, Mode> mode; + BitField<31, 4, u64> mask; + } const txq{insn}; + + IR::Reg src_reg{txq.src_reg}; + IR::U32 handle; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(src_reg); + ++src_reg; + } + const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + IR::Reg dest_reg{txq.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((txq.mask >> element) & 1) == 0) { + continue; + } + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXQ(u64 insn) { + union { + u64 raw; + BitField<36, 13, u64> cbuf_offset; + } const txq{insn}; + + Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4)); +} + +void TranslatorVisitor::TXQ_b(u64 insn) { + Impl(*this, insn, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { + +IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, + u32 selector, bool is_signed) { + switch (width) { + case VideoWidth::Byte: + case VideoWidth::Unknown: + return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); + case VideoWidth::Short: + return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); + case VideoWidth::Word: + return value; + default: + throw NotImplementedException("Unknown VideoWidth {}", width); + } +} + +VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { + // immediates must be 16-bit format. + return is_immediate ? VideoWidth::Short : width; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +enum class VideoWidth : u64 { + Byte, + Unknown, + Short, + Word, +}; + +[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, + VideoWidth width, u32 selector, bool is_signed); + +[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VideoMinMaxOps : u64 { + MRG_16H, + MRG_16L, + MRG_8B0, + MRG_8B2, + ACC, + MIN, + MAX, +}; + +[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, + VideoMinMaxOps op, bool is_signed) { + switch (op) { + case VideoMinMaxOps::MIN: + return ir.IMin(lhs, rhs, is_signed); + case VideoMinMaxOps::MAX: + return ir.IMax(lhs, rhs, is_signed); + default: + throw NotImplementedException("VMNMX op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VMNMX(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 3, VideoMinMaxOps> op; + BitField<54, 1, u64> dest_sign; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> mx; + } const vmnmx{insn}; + + if (vmnmx.cc != 0) { + throw NotImplementedException("VMNMX CC"); + } + if (vmnmx.sat != 0) { + throw NotImplementedException("VMNMX SAT"); + } + // Selectors were shown to default to 2 in unit tests + if (vmnmx.src_a_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); + } + if (vmnmx.src_b_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); + } + if (vmnmx.src_a_width != VideoWidth::Word) { + throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); + } + + const bool is_b_imm{vmnmx.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const VideoWidth a_width{vmnmx.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmnmx.src_a_sign != 0}; + const bool src_b_signed{vmnmx.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + + // First operation's sign is only dependent on operand b's sign + const bool op_1_signed{src_b_signed}; + + const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) + : ir.IMin(op_a, op_b, op_1_signed)}; + X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::VMAD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 2, u64> scale; + BitField<53, 1, u64> src_c_neg; + BitField<54, 1, u64> src_a_neg; + BitField<55, 1, u64> sat; + } const vmad{insn}; + + if (vmad.cc != 0) { + throw NotImplementedException("VMAD CC"); + } + if (vmad.sat != 0) { + throw NotImplementedException("VMAD SAT"); + } + if (vmad.scale != 0) { + throw NotImplementedException("VMAD SCALE"); + } + if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD PO"); + } + if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD NEG"); + } + const bool is_b_imm{vmad.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const u32 a_selector{static_cast<u32>(vmad.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)}; + const VideoWidth a_width{vmad.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmad.src_a_sign != 0}; + const bool src_b_signed{vmad.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..1b66abc33 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VsetpCompareOp : u64 { + False = 0, + LessThan, + Equal, + LessThanEqual, + GreaterThan = 16, + NotEqual, + GreaterThanEqual, + True, +}; + +CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { + switch (op) { + case VsetpCompareOp::False: + return CompareOp::False; + case VsetpCompareOp::LessThan: + return CompareOp::LessThan; + case VsetpCompareOp::Equal: + return CompareOp::Equal; + case VsetpCompareOp::LessThanEqual: + return CompareOp::LessThanEqual; + case VsetpCompareOp::GreaterThan: + return CompareOp::GreaterThan; + case VsetpCompareOp::NotEqual: + return CompareOp::NotEqual; + case VsetpCompareOp::GreaterThanEqual: + return CompareOp::GreaterThanEqual; + case VsetpCompareOp::True: + return CompareOp::True; + default: + throw NotImplementedException("Invalid compare op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 5, VsetpCompareOp> compare_op; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + } const vsetp{insn}; + + const bool is_b_imm{vsetp.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; + + const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; + const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)}; + const VideoWidth a_width{vsetp.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; + + const bool src_a_signed{vsetp.src_a_sign != 0}; + const bool src_b_signed{vsetp.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + // Compare operation's sign is only dependent on operand b's sign + const bool compare_signed{src_b_signed}; + const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; + const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; + const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; + ir.SetPred(vsetp.dest_pred_a, result_a); + ir.SetPred(vsetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..7ce370f09 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +void TranslatorVisitor::VOTE_vtg(u64) { + LOG_WARNING(Shader, "(STUBBED) called"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ShuffleMode : u64 { + IDX, + UP, + DOWN, + BFLY, +}; + +[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, + const IR::U32& index, const IR::U32& mask, + ShuffleMode shfl_op) { + const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; + const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; + switch (shfl_op) { + case ShuffleMode::IDX: + return ir.ShuffleIndex(value, index, clamp, seg_mask); + case ShuffleMode::UP: + return ir.ShuffleUp(value, index, clamp, seg_mask); + case ShuffleMode::DOWN: + return ir.ShuffleDown(value, index, clamp, seg_mask); + case ShuffleMode::BFLY: + return ir.ShuffleButterfly(value, index, clamp, seg_mask); + default: + throw NotImplementedException("Invalid SHFL op {}", shfl_op); + } +} + +void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<30, 2, ShuffleMode> mode; + BitField<48, 3, IR::Pred> pred; + } const shfl{insn}; + + const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; + v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); + v.X(shfl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHFL(u64 insn) { + union { + u64 insn; + BitField<20, 5, u64> src_a_imm; + BitField<28, 1, u64> src_a_flag; + BitField<29, 1, u64> src_b_flag; + BitField<34, 13, u64> src_b_imm; + } const flags{insn}; + const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) + : GetReg20(insn)}; + const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) + : GetReg39(insn)}; + Shuffle(*this, insn, src_a, src_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..8e3c4c5d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +template <auto method> +static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { + using MethodType = decltype(method); + if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) { + (visitor.*method)(pc, insn); + } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) { + (visitor.*method)(insn); + } else { + (visitor.*method)(); + } +} + +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { + if (location_begin == location_end) { + return; + } + TranslatorVisitor visitor{env, *block}; + for (Location pc = location_begin; pc != location_end; ++pc) { + const u64 insn{env.ReadInstruction(pc.Offset())}; + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { +#define INST(name, cute, mask) \ + case Opcode::name: \ + Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ + break; +#include "shader_recompiler/frontend/maxwell/maxwell.inc" +#undef OPCODE + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..a3edd2e46 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::Maxwell { + +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..c067d459c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,223 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <vector> + +#include "common/settings.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + std::optional<PixelImap> imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.interpolation[index] = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; + if (program.is_geometry_passthrough) { + const auto& mask{env.GpPassthroughMask()}; + for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; + } + } + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + if (!host_info.support_float16) { + Optimization::LowerFp16ToFp32(program); + } + if (!host_info.support_int64) { + Optimization::LowerInt64ToInt32(program); + } + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + if (Settings::values.renderer_debug) { + Optimization::VerificationPass(program); + } + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type != IR::AbstractSyntaxNode::Type::Return) { + result.syntax_list.push_back(term); + } + } + result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), + vertex_b.syntax_list.end()); + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + if (Settings::values.renderer_debug) { + Optimization::VerificationPass(result); + } + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..a84814811 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, + ObjectPool<IR::Block>& block_pool, Environment& env, + Flow::CFG& cfg, const HostTranslateInfo& host_info); + +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +// Try to keep entries here to a minimum +// They can accidentally change the cached information in a shader + +/// Misc information about the host +struct HostTranslateInfo { + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers +}; + +} // namespace Shader diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp new file mode 100644 index 000000000..5ead930f1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -0,0 +1,928 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { + return; + } + info.constant_buffer_mask |= 1U << index; + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), + ConstantBufferDescriptor{ + .index = index, + .count = 1, + }); +} + +void GetPatch(Info& info, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Reading non-generic patch {}", patch); + } + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; +} + +void SetPatch(Info& info, IR::Patch patch) { + if (IR::IsGeneric(patch)) { + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodBottom: + info.stores_tess_level_outer = true; + break; + case IR::Patch::TessellationLodInteriorU: + case IR::Patch::TessellationLodInteriorV: + info.stores_tess_level_inner = true; + break; + default: + throw NotImplementedException("Set patch {}", patch); + } +} + +void CheckCBufNVN(Info& info, IR::Inst& inst) { + const IR::Value cbuf_index{inst.Arg(0)}; + if (!cbuf_index.IsImmediate()) { + info.nvn_buffer_used.set(); + return; + } + const u32 index{cbuf_index.U32()}; + if (index != 0) { + return; + } + const IR::Value cbuf_offset{inst.Arg(1)}; + if (!cbuf_offset.IsImmediate()) { + info.nvn_buffer_used.set(); + return; + } + const u32 offset{cbuf_offset.U32()}; + const u32 descriptor_size{0x10}; + const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16}; + if (offset >= info.nvn_buffer_base && offset < upper_limit) { + const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size}; + info.nvn_buffer_used.set(nvn_index, true); + } +} + +void VisitUsages(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF16x3: + case IR::Opcode::CompositeConstructF16x4: + case IR::Opcode::CompositeExtractF16x2: + case IR::Opcode::CompositeExtractF16x3: + case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::CompositeInsertF16x2: + case IR::Opcode::CompositeInsertF16x3: + case IR::Opcode::CompositeInsertF16x4: + case IR::Opcode::SelectF16: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::PackFloat2x16: + case IR::Opcode::UnpackFloat2x16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS32F16: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU32F16: + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16S32: + case IR::Opcode::ConvertF16S64: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF16U32: + case IR::Opcode::ConvertF16U64: + case IR::Opcode::FPAbs16: + case IR::Opcode::FPAdd16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPNeg16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPSaturate16: + case IR::Opcode::FPClamp16: + case IR::Opcode::FPTrunc16: + case IR::Opcode::FPOrdEqual16: + case IR::Opcode::FPUnordEqual16: + case IR::Opcode::FPOrdNotEqual16: + case IR::Opcode::FPUnordNotEqual16: + case IR::Opcode::FPOrdLessThan16: + case IR::Opcode::FPUnordLessThan16: + case IR::Opcode::FPOrdGreaterThan16: + case IR::Opcode::FPUnordGreaterThan16: + case IR::Opcode::FPOrdLessThanEqual16: + case IR::Opcode::FPUnordLessThanEqual16: + case IR::Opcode::FPOrdGreaterThanEqual16: + case IR::Opcode::FPUnordGreaterThanEqual16: + case IR::Opcode::FPIsNan16: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: + info.uses_fp16 = true; + break; + case IR::Opcode::CompositeConstructF64x2: + case IR::Opcode::CompositeConstructF64x3: + case IR::Opcode::CompositeConstructF64x4: + case IR::Opcode::CompositeExtractF64x2: + case IR::Opcode::CompositeExtractF64x3: + case IR::Opcode::CompositeExtractF64x4: + case IR::Opcode::CompositeInsertF64x2: + case IR::Opcode::CompositeInsertF64x3: + case IR::Opcode::CompositeInsertF64x4: + case IR::Opcode::SelectF64: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackDouble2x32: + case IR::Opcode::UnpackDouble2x32: + case IR::Opcode::FPAbs64: + case IR::Opcode::FPAdd64: + case IR::Opcode::FPCeil64: + case IR::Opcode::FPFloor64: + case IR::Opcode::FPFma64: + case IR::Opcode::FPMax64: + case IR::Opcode::FPMin64: + case IR::Opcode::FPMul64: + case IR::Opcode::FPNeg64: + case IR::Opcode::FPRecip64: + case IR::Opcode::FPRecipSqrt64: + case IR::Opcode::FPRoundEven64: + case IR::Opcode::FPSaturate64: + case IR::Opcode::FPClamp64: + case IR::Opcode::FPTrunc64: + case IR::Opcode::FPOrdEqual64: + case IR::Opcode::FPUnordEqual64: + case IR::Opcode::FPOrdNotEqual64: + case IR::Opcode::FPUnordNotEqual64: + case IR::Opcode::FPOrdLessThan64: + case IR::Opcode::FPUnordLessThan64: + case IR::Opcode::FPOrdGreaterThan64: + case IR::Opcode::FPUnordGreaterThan64: + case IR::Opcode::FPOrdLessThanEqual64: + case IR::Opcode::FPUnordLessThanEqual64: + case IR::Opcode::FPOrdGreaterThanEqual64: + case IR::Opcode::FPUnordGreaterThanEqual64: + case IR::Opcode::FPIsNan64: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertS32F64: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertU32F64: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertF32F64: + case IR::Opcode::ConvertF64F32: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64S32: + case IR::Opcode::ConvertF64S64: + case IR::Opcode::ConvertF64U8: + case IR::Opcode::ConvertF64U16: + case IR::Opcode::ConvertF64U32: + case IR::Opcode::ConvertF64U64: + info.uses_fp64 = true; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::UndefU8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + case IR::Opcode::LoadSharedU8: + case IR::Opcode::LoadSharedS8: + case IR::Opcode::WriteSharedU8: + case IR::Opcode::SelectU8: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF32S8: + case IR::Opcode::ConvertF32U8: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64U8: + info.uses_int8 = true; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::UndefU16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedS16: + case IR::Opcode::WriteSharedU16: + case IR::Opcode::SelectU16: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS16F32: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU16F32: + case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF32S16: + case IR::Opcode::ConvertF32U16: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64U16: + info.uses_int16 = true; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::UndefU64: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::SelectU64: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + case IR::Opcode::IAdd64: + case IR::Opcode::ISub64: + case IR::Opcode::INeg64: + case IR::Opcode::ShiftLeftLogical64: + case IR::Opcode::ShiftRightLogical64: + case IR::Opcode::ShiftRightArithmetic64: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertS64F32: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertU64F32: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertU64U32: + case IR::Opcode::ConvertU32U64: + case IR::Opcode::ConvertF16U64: + case IR::Opcode::ConvertF32U64: + case IR::Opcode::ConvertF64U64: + case IR::Opcode::SharedAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: + case IR::Opcode::StorageAtomicExchange64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + info.stores_global_memory = true; + [[fallthrough]]; + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + info.uses_int64 = true; + info.uses_global_memory = true; + info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; + info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; + break; + default: + break; + } + switch (inst.GetOpcode()) { + case IR::Opcode::DemoteToHelperInvocation: + info.uses_demote_to_helper_invocation = true; + break; + case IR::Opcode::GetAttribute: + info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; + break; + case IR::Opcode::SetAttribute: + info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; + break; + case IR::Opcode::GetPatch: + GetPatch(info, inst.Arg(0).Patch()); + break; + case IR::Opcode::SetPatch: + SetPatch(info, inst.Arg(0).Patch()); + break; + case IR::Opcode::GetAttributeIndexed: + info.loads_indexed_attributes = true; + break; + case IR::Opcode::SetAttributeIndexed: + info.stores_indexed_attributes = true; + break; + case IR::Opcode::SetFragColor: + info.stores_frag_color[inst.Arg(0).U32()] = true; + break; + case IR::Opcode::SetSampleMask: + info.stores_sample_mask = true; + break; + case IR::Opcode::SetFragDepth: + info.stores_frag_depth = true; + break; + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::InvocationId: + info.uses_invocation_id = true; + break; + case IR::Opcode::SampleId: + info.uses_sample_id = true; + break; + case IR::Opcode::IsHelperInvocation: + info.uses_is_helper_invocation = true; + break; + case IR::Opcode::LaneId: + info.uses_subgroup_invocation_id = true; + break; + case IR::Opcode::ShuffleIndex: + case IR::Opcode::ShuffleUp: + case IR::Opcode::ShuffleDown: + case IR::Opcode::ShuffleButterfly: + info.uses_subgroup_shuffles = true; + break; + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU32x2: { + const IR::Value index{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!index.IsImmediate()) { + throw NotImplementedException("Constant buffer with non-immediate index"); + } + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size{}; + switch (inst.GetOpcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + info.used_constant_buffer_types |= IR::Type::U8; + element_size = 1; + break; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + info.used_constant_buffer_types |= IR::Type::U16; + element_size = 2; + break; + case IR::Opcode::GetCbufU32: + info.used_constant_buffer_types |= IR::Type::U32; + element_size = 4; + break; + case IR::Opcode::GetCbufF32: + info.used_constant_buffer_types |= IR::Type::F32; + element_size = 4; + break; + case IR::Opcode::GetCbufU32x2: + info.used_constant_buffer_types |= IR::Type::U32x2; + element_size = 8; + break; + default: + break; + } + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); + } else { + size = 0x10'000; + } + break; + } + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: + case IR::Opcode::BindlessImageQueryLod: + case IR::Opcode::BindlessImageGradient: + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageGather: + case IR::Opcode::BoundImageGatherDref: + case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BoundImageGradient: + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageFetch: + case IR::Opcode::ImageQueryDimensions: + case IR::Opcode::ImageGradient: { + const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageQueryLod: { + const auto flags{inst.Flags<IR::TextureInstInfo>()}; + const TextureType type{flags.type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_shadow_lod |= flags.is_depth != 0; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } + case IR::Opcode::ImageRead: { + const auto flags{inst.Flags<IR::TextureInstInfo>()}; + info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } + case IR::Opcode::ImageWrite: { + const auto flags{inst.Flags<IR::TextureInstInfo>()}; + info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + info.uses_image_buffers |= flags.type == TextureType::Buffer; + break; + } + case IR::Opcode::SubgroupEqMask: + case IR::Opcode::SubgroupLtMask: + case IR::Opcode::SubgroupLeMask: + case IR::Opcode::SubgroupGtMask: + case IR::Opcode::SubgroupGeMask: + info.uses_subgroup_mask = true; + break; + case IR::Opcode::VoteAll: + case IR::Opcode::VoteAny: + case IR::Opcode::VoteEqual: + case IR::Opcode::SubgroupBallot: + info.uses_subgroup_vote = true; + break; + case IR::Opcode::FSwizzleAdd: + info.uses_fswzadd = true; + break; + case IR::Opcode::DPdxFine: + case IR::Opcode::DPdyFine: + case IR::Opcode::DPdxCoarse: + case IR::Opcode::DPdyCoarse: + info.uses_derivatives = true; + break; + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + info.used_storage_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + info.used_storage_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::LoadStorage32: + case IR::Opcode::WriteStorage32: + case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicUMax32: + case IR::Opcode::StorageAtomicAnd32: + case IR::Opcode::StorageAtomicOr32: + case IR::Opcode::StorageAtomicXor32: + case IR::Opcode::StorageAtomicExchange32: + info.used_storage_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::LoadStorage64: + case IR::Opcode::WriteStorage64: + info.used_storage_buffer_types |= IR::Type::U32x2; + break; + case IR::Opcode::LoadStorage128: + case IR::Opcode::WriteStorage128: + info.used_storage_buffer_types |= IR::Type::U32x4; + break; + case IR::Opcode::SharedAtomicSMin32: + info.uses_atomic_s32_min = true; + break; + case IR::Opcode::SharedAtomicSMax32: + info.uses_atomic_s32_max = true; + break; + case IR::Opcode::SharedAtomicInc32: + info.uses_shared_increment = true; + break; + case IR::Opcode::SharedAtomicDec32: + info.uses_shared_decrement = true; + break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_int64_bit_atomics = true; + break; + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::StorageAtomicInc32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_global_increment = true; + break; + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::StorageAtomicDec32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_global_decrement = true; + break; + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::StorageAtomicAddF32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f32_add = true; + break; + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f16x2_add = true; + break; + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::StorageAtomicAddF32x2: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f32x2_add = true; + break; + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f16x2_min = true; + break; + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::StorageAtomicMinF32x2: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f32x2_min = true; + break; + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f16x2_max = true; + break; + case IR::Opcode::GlobalAtomicMaxF32x2: + case IR::Opcode::StorageAtomicMaxF32x2: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_f32x2_max = true; + break; + case IR::Opcode::StorageAtomicSMin32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_s32_min = true; + break; + case IR::Opcode::StorageAtomicSMax32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_s32_max = true; + break; + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: + info.used_storage_buffer_types |= IR::Type::U64; + info.uses_int64_bit_atomics = true; + break; + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: + info.uses_atomic_image_u32 = true; + break; + default: + break; + } +} + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags<IR::FpControl>()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: { + const auto control{inst.Flags<IR::FpControl>()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void VisitCbufs(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU32x2: { + CheckCBufNVN(info, inst); + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); + VisitCbufs(info, inst); +} + +void GatherInfoFromHeader(Environment& env, Info& info) { + Stage stage{env.ShaderStage()}; + if (stage == Stage::Compute) { + return; + } + const auto& header{env.SPH()}; + if (stage == Stage::Fragment) { + if (!info.loads_indexed_attributes) { + return; + } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; + } + return; + } + if (info.loads_indexed_attributes) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings + } + if (info.stores_indexed_attributes) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings + } +} +} // Anonymous namespace + +void CollectShaderInfoPass(Environment& env, IR::Program& program) { + Info& info{program.info}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + info.nvn_buffer_base = base; + + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } + GatherInfoFromHeader(env, info); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..8dd6d6c2c --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -0,0 +1,610 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <tuple> +#include <type_traits> + +#include "common/bit_cast.h" +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +// Metaprogramming stuff to get arguments information out of a lambda +template <typename Func> +struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {}; + +template <typename ReturnType, typename LambdaType, typename... Args> +struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> { + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; + + static constexpr size_t NUM_ARGS{sizeof...(Args)}; +}; + +template <typename T> +[[nodiscard]] T Arg(const IR::Value& value) { + if constexpr (std::is_same_v<T, bool>) { + return value.U1(); + } else if constexpr (std::is_same_v<T, u32>) { + return value.U32(); + } else if constexpr (std::is_same_v<T, s32>) { + return static_cast<s32>(value.U32()); + } else if constexpr (std::is_same_v<T, f32>) { + return value.F32(); + } else if constexpr (std::is_same_v<T, u64>) { + return value.U64(); + } +} + +template <typename T, typename ImmFn> +bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { + const IR::Value lhs{inst.Arg(0)}; + const IR::Value rhs{inst.Arg(1)}; + + const bool is_lhs_immediate{lhs.IsImmediate()}; + const bool is_rhs_immediate{rhs.IsImmediate()}; + + if (is_lhs_immediate && is_rhs_immediate) { + const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))}; + inst.ReplaceUsesWith(IR::Value{result}); + return false; + } + if (is_lhs_immediate && !is_rhs_immediate) { + IR::Inst* const rhs_inst{rhs.InstRecursive()}; + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))}; + inst.SetArg(0, rhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* const lhs_inst{lhs.InstRecursive()}; + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))}; + inst.SetArg(0, lhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } + } + return true; +} + +template <typename Func> +bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return false; + } + using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + return true; +} + +void FoldGetRegister(IR::Inst& inst) { + if (inst.Arg(0).Reg() == IR::Reg::RZ) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + } +} + +void FoldGetPred(IR::Inst& inst) { + if (inst.Arg(0).Pred() == IR::Pred::PT) { + inst.ReplaceUsesWith(IR::Value{true}); + } +} + +/// Replaces the pattern generated by two XMAD multiplications +bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { + /* + * We are looking for this pattern: + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %rhs_mul = IMul32 %rhs_bfe, %factor_b + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %rhs_mul = IMul32 %lhs_bfe, %factor_b + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 + * %result = IAdd32 %lhs_shl, %rhs_mul + * + * And replacing it with + * %result = IMul32 %factor_a, %factor_b + * + * This optimization has been proven safe by LLVM and MSVC. + */ + const IR::Value lhs_arg{inst.Arg(0)}; + const IR::Value rhs_arg{inst.Arg(1)}; + if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { + return false; + } + IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { + return false; + } + if (lhs_shl->Arg(0).IsImmediate()) { + return false; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; + IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { + return false; + } + if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { + return false; + } + const IR::U32 factor_b{lhs_mul->Arg(1)}; + if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { + return false; + } + IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; + IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return false; + } + if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { + return false; + } + if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { + return false; + } + const IR::U32 factor_a{lhs_bfe->Arg(0)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); + return true; +} + +template <typename T> +void FoldAdd(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + return; + } + if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg<T>(rhs) == 0) { + inst.ReplaceUsesWith(inst.Arg(0)); + return; + } + if constexpr (std::is_same_v<T, u32>) { + if (FoldXmadMultiply(block, inst)) { + return; + } + } +} + +void FoldISub32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { + return; + } + if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { + return; + } + // ISub32 is generally used to subtract two constant buffers, compare and replace this with + // zero if they equal. + const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); + }}; + IR::Inst* op_a{inst.Arg(0).InstRecursive()}; + IR::Inst* op_b{inst.Arg(1).InstRecursive()}; + if (equal_cbuf(op_a, op_b)) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + return; + } + // It's also possible a value is being added to a cbuf and then subtracted + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { + // Canonicalize local variables to simplify the following logic + std::swap(op_a, op_b); + } + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { + return; + } + IR::Inst* const inst_cbuf{op_b}; + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { + return; + } + IR::Value add_op_a{op_a->Arg(0)}; + IR::Value add_op_b{op_a->Arg(1)}; + if (add_op_b.IsImmediate()) { + // Canonicalize + std::swap(add_op_a, add_op_b); + } + if (add_op_b.IsImmediate()) { + return; + } + IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; + if (equal_cbuf(add_cbuf, inst_cbuf)) { + inst.ReplaceUsesWith(add_op_a); + } +} + +void FoldSelect(IR::Inst& inst) { + const IR::Value cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + } +} + +void FoldFPMul32(IR::Inst& inst) { + const auto control{inst.Flags<IR::FpControl>()}; + if (control.no_contraction) { + return; + } + // Fold interpolation operations + const IR::Value lhs_value{inst.Arg(0)}; + const IR::Value rhs_value{inst.Arg(1)}; + if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { + return; + } + IR::Inst* const lhs_op{lhs_value.InstRecursive()}; + IR::Inst* const rhs_op{rhs_value.InstRecursive()}; + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { + return; + } + const IR::Value recip_source{rhs_op->Arg(0)}; + const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()}; + if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) { + return; + } + IR::Inst* const attr_a{recip_source.InstRecursive()}; + IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { + return; + } + if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { + inst.ReplaceUsesWith(lhs_op->Arg(0)); + } +} + +void FoldLogicalAnd(IR::Inst& inst) { + if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(inst.Arg(0)); + } else { + inst.ReplaceUsesWith(IR::Value{false}); + } + } +} + +void FoldLogicalOr(IR::Inst& inst) { + if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(IR::Value{true}); + } else { + inst.ReplaceUsesWith(inst.Arg(0)); + } + } +} + +void FoldLogicalNot(IR::Inst& inst) { + const IR::U1 value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{!value.U1()}); + return; + } + IR::Inst* const arg{value.InstRecursive()}; + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { + inst.ReplaceUsesWith(arg->Arg(0)); + } +} + +template <IR::Opcode op, typename Dest, typename Source> +void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))}); + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (arg_inst->GetOpcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } + if constexpr (op == IR::Opcode::BitCastF32U32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { + // Replace the bitcast with a typed constant buffer read + inst.ReplaceOpcode(IR::Opcode::GetCbufF32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } + } +} + +void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (arg_inst->GetOpcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } +} + +template <typename Func, size_t... I> +IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) { + using Traits = LambdaTraits<decltype(func)>; + return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)}; +} + +std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, + IR::Opcode construct, u32 first_index) { + IR::Inst* const inst{inst_value.InstRecursive()}; + if (inst->GetOpcode() == construct) { + return inst->Arg(first_index); + } + if (inst->GetOpcode() != insert) { + return std::nullopt; + } + IR::Value value_index{inst->Arg(2)}; + if (!value_index.IsImmediate()) { + return std::nullopt; + } + const u32 second_index{value_index.U32()}; + if (first_index != second_index) { + IR::Value value_composite{inst->Arg(0)}; + if (value_composite.IsImmediate()) { + return std::nullopt; + } + return FoldCompositeExtractImpl(value_composite, insert, construct, first_index); + } + return inst->Arg(1); +} + +void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) { + const IR::Value value_1{inst.Arg(0)}; + const IR::Value value_2{inst.Arg(1)}; + if (value_1.IsImmediate()) { + return; + } + if (!value_2.IsImmediate()) { + return; + } + const u32 first_index{value_2.U32()}; + const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)}; + if (!result) { + return; + } + inst.ReplaceUsesWith(*result); +} + +IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { + if (value.IsImmediate()) { + return value; + } + IR::Inst* const inst{value.InstRecursive()}; + if (inst->GetOpcode() == expected_cast) { + return inst->Arg(0).Resolve(); + } + return value; +} + +void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { + const IR::Value swizzle{inst.Arg(2)}; + if (!swizzle.IsImmediate()) { + return; + } + const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; + const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; + if (value_1.IsImmediate()) { + return; + } + const u32 swizzle_value{swizzle.U32()}; + if (swizzle_value != 0x99 && swizzle_value != 0xA5) { + return; + } + IR::Inst* const inst2{value_1.InstRecursive()}; + if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { + return; + } + const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; + if (value_2 != value_3) { + return; + } + const IR::Value index{inst2->Arg(1)}; + const IR::Value clamp{inst2->Arg(2)}; + const IR::Value segmentation_mask{inst2->Arg(3)}; + if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { + return; + } + if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { + return; + } + if (swizzle_value == 0x99) { + // DPdxFine + if (index.U32() == 1) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)})); + } + } else if (swizzle_value == 0xA5) { + // DPdyFine + if (index.U32() == 2) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)})); + } + } +} + +void ConstantPropagation(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetRegister: + return FoldGetRegister(inst); + case IR::Opcode::GetPred: + return FoldGetPred(inst); + case IR::Opcode::IAdd32: + return FoldAdd<u32>(block, inst); + case IR::Opcode::ISub32: + return FoldISub32(inst); + case IR::Opcode::IMul32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + return; + case IR::Opcode::ShiftRightArithmetic32: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); }); + return; + case IR::Opcode::BitCastF32U32: + return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32); + case IR::Opcode::BitCastU32F32: + return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32); + case IR::Opcode::IAdd64: + return FoldAdd<u64>(block, inst); + case IR::Opcode::PackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16); + case IR::Opcode::UnpackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16); + case IR::Opcode::SelectU1: + case IR::Opcode::SelectU8: + case IR::Opcode::SelectU16: + case IR::Opcode::SelectU32: + case IR::Opcode::SelectU64: + case IR::Opcode::SelectF16: + case IR::Opcode::SelectF32: + case IR::Opcode::SelectF64: + return FoldSelect(inst); + case IR::Opcode::FPMul32: + return FoldFPMul32(inst); + case IR::Opcode::LogicalAnd: + return FoldLogicalAnd(inst); + case IR::Opcode::LogicalOr: + return FoldLogicalOr(inst); + case IR::Opcode::LogicalNot: + return FoldLogicalNot(inst); + case IR::Opcode::SLessThan: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + return; + case IR::Opcode::ULessThan: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + return; + case IR::Opcode::SLessThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); + return; + case IR::Opcode::ULessThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); + return; + case IR::Opcode::SGreaterThan: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); + return; + case IR::Opcode::UGreaterThan: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); + return; + case IR::Opcode::SGreaterThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); + return; + case IR::Opcode::UGreaterThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); + return; + case IR::Opcode::IEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); + return; + case IR::Opcode::INotEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); + return; + case IR::Opcode::BitwiseAnd32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); + return; + case IR::Opcode::BitwiseOr32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; }); + return; + case IR::Opcode::BitwiseXor32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; }); + return; + case IR::Opcode::BitFieldUExtract: + FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, + base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); + }); + return; + case IR::Opcode::BitFieldSExtract: + FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { + const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)}; + const size_t left_shift{32 - back_shift}; + const size_t right_shift{static_cast<size_t>(32 - count)}; + if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, + base, shift, count); + } + return static_cast<u32>((base << left_shift) >> right_shift); + }); + return; + case IR::Opcode::BitFieldInsert: + FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { + if (bits >= 32 || offset >= 32) { + throw LogicError("Undefined result in {}({}, {}, {}, {})", + IR::Opcode::BitFieldInsert, base, insert, offset, bits); + } + return (base & ~(~(~0u << bits) << offset)) | (insert << offset); + }); + return; + case IR::Opcode::CompositeExtractU32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2, + IR::Opcode::CompositeInsertU32x2); + case IR::Opcode::CompositeExtractU32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3, + IR::Opcode::CompositeInsertU32x3); + case IR::Opcode::CompositeExtractU32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4, + IR::Opcode::CompositeInsertU32x4); + case IR::Opcode::CompositeExtractF32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, + IR::Opcode::CompositeInsertF32x2); + case IR::Opcode::CompositeExtractF32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3, + IR::Opcode::CompositeInsertF32x3); + case IR::Opcode::CompositeExtractF32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4, + IR::Opcode::CompositeInsertF32x4); + case IR::Opcode::CompositeExtractF16x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2, + IR::Opcode::CompositeInsertF16x2); + case IR::Opcode::CompositeExtractF16x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3, + IR::Opcode::CompositeInsertF16x3); + case IR::Opcode::CompositeExtractF16x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, + IR::Opcode::CompositeInsertF16x4); + case IR::Opcode::FSwizzleAdd: + return FoldFSwizzleAdd(block, inst); + default: + break; + } +} +} // Anonymous namespace + +void ConstantPropagationPass(IR::Program& program) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; + for (IR::Inst& inst : block->Instructions()) { + ConstantPropagation(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp new file mode 100644 index 000000000..400836301 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void DeadCodeEliminationPass(IR::Program& program) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (IR::Block* const block : program.post_order_blocks) { + auto it{block->end()}; + while (it != block->begin()) { + --it; + if (!it->HasUses() && !it->MayHaveSideEffects()) { + it->Invalidate(); + it = block->Instructions().erase(it); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..055ba9c54 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void VertexATransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } +} + +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..4197b0095 --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -0,0 +1,526 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <compare> +#include <optional> +#include <queue> + +#include <boost/container/flat_set.hpp> +#include <boost/container/small_vector.hpp> + +#include "common/alignment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +/// Address in constant buffers to the storage buffer descriptor +struct StorageBufferAddr { + auto operator<=>(const StorageBufferAddr&) const noexcept = default; + + u32 index; + u32 offset; +}; + +/// Block iterator to a global memory instruction and the storage buffer it uses +struct StorageInst { + StorageBufferAddr storage_buffer; + IR::Inst* inst; + IR::Block* block; +}; + +/// Bias towards a certain range of constant buffers when looking for storage buffers +struct Bias { + u32 index; + u32 offset_begin; + u32 offset_end; +}; + +using boost::container::flat_set; +using boost::container::small_vector; +using StorageBufferSet = + flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; +using StorageInstVector = small_vector<StorageInst, 24>; +using StorageWritesSet = + flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; + +struct StorageInfo { + StorageBufferSet set; + StorageInstVector to_replace; + StorageWritesSet writes; +}; + +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemory(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return true; + default: + return false; + } +} + +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemoryWrite(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return true; + default: + return false; + } +} + +/// Converts a global memory opcode to its storage buffer equivalent +IR::Opcode GlobalToStorage(IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::LoadGlobalS8: + return IR::Opcode::LoadStorageS8; + case IR::Opcode::LoadGlobalU8: + return IR::Opcode::LoadStorageU8; + case IR::Opcode::LoadGlobalS16: + return IR::Opcode::LoadStorageS16; + case IR::Opcode::LoadGlobalU16: + return IR::Opcode::LoadStorageU16; + case IR::Opcode::LoadGlobal32: + return IR::Opcode::LoadStorage32; + case IR::Opcode::LoadGlobal64: + return IR::Opcode::LoadStorage64; + case IR::Opcode::LoadGlobal128: + return IR::Opcode::LoadStorage128; + case IR::Opcode::WriteGlobalS8: + return IR::Opcode::WriteStorageS8; + case IR::Opcode::WriteGlobalU8: + return IR::Opcode::WriteStorageU8; + case IR::Opcode::WriteGlobalS16: + return IR::Opcode::WriteStorageS16; + case IR::Opcode::WriteGlobalU16: + return IR::Opcode::WriteStorageU16; + case IR::Opcode::WriteGlobal32: + return IR::Opcode::WriteStorage32; + case IR::Opcode::WriteGlobal64: + return IR::Opcode::WriteStorage64; + case IR::Opcode::WriteGlobal128: + return IR::Opcode::WriteStorage128; + case IR::Opcode::GlobalAtomicIAdd32: + return IR::Opcode::StorageAtomicIAdd32; + case IR::Opcode::GlobalAtomicSMin32: + return IR::Opcode::StorageAtomicSMin32; + case IR::Opcode::GlobalAtomicUMin32: + return IR::Opcode::StorageAtomicUMin32; + case IR::Opcode::GlobalAtomicSMax32: + return IR::Opcode::StorageAtomicSMax32; + case IR::Opcode::GlobalAtomicUMax32: + return IR::Opcode::StorageAtomicUMax32; + case IR::Opcode::GlobalAtomicInc32: + return IR::Opcode::StorageAtomicInc32; + case IR::Opcode::GlobalAtomicDec32: + return IR::Opcode::StorageAtomicDec32; + case IR::Opcode::GlobalAtomicAnd32: + return IR::Opcode::StorageAtomicAnd32; + case IR::Opcode::GlobalAtomicOr32: + return IR::Opcode::StorageAtomicOr32; + case IR::Opcode::GlobalAtomicXor32: + return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicIAdd64: + return IR::Opcode::StorageAtomicIAdd64; + case IR::Opcode::GlobalAtomicSMin64: + return IR::Opcode::StorageAtomicSMin64; + case IR::Opcode::GlobalAtomicUMin64: + return IR::Opcode::StorageAtomicUMin64; + case IR::Opcode::GlobalAtomicSMax64: + return IR::Opcode::StorageAtomicSMax64; + case IR::Opcode::GlobalAtomicUMax64: + return IR::Opcode::StorageAtomicUMax64; + case IR::Opcode::GlobalAtomicAnd64: + return IR::Opcode::StorageAtomicAnd64; + case IR::Opcode::GlobalAtomicOr64: + return IR::Opcode::StorageAtomicOr64; + case IR::Opcode::GlobalAtomicXor64: + return IR::Opcode::StorageAtomicXor64; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; + case IR::Opcode::GlobalAtomicExchange64: + return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicAddF32: + return IR::Opcode::StorageAtomicAddF32; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF16x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF16x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF16x2; + case IR::Opcode::GlobalAtomicAddF32x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF32x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF32x2: + return IR::Opcode::StorageAtomicMaxF32x2; + default: + throw InvalidArgument("Invalid global memory opcode {}", opcode); + } +} + +/// Returns true when a storage buffer address satisfies a bias +bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { + return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end; +} + +struct LowAddrInfo { + IR::U32 value; + s32 imm_offset; +}; + +/// Tries to track the first 32-bits of a global memory instruction +std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { + // The first argument is the low level GPU pointer to the global memory instruction + const IR::Value addr{inst->Arg(0)}; + if (addr.IsImmediate()) { + // Not much we can do if it's an immediate + return std::nullopt; + } + // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 + IR::Inst* addr_inst{addr.InstRecursive()}; + s32 imm_offset{0}; + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { + // If it's an IAdd64, get the immediate offset it is applying and grab the address + // instruction. This expects for the instruction to be canonicalized having the address on + // the first argument and the immediate offset on the second one. + const IR::U64 imm_offset_value{addr_inst->Arg(1)}; + if (!imm_offset_value.IsImmediate()) { + return std::nullopt; + } + imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); + const IR::U64 iadd_addr{addr_inst->Arg(0)}; + if (iadd_addr.IsImmediate()) { + return std::nullopt; + } + addr_inst = iadd_addr.InstRecursive(); + } + // With IAdd64 handled, now PackUint2x32 is expected + if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + addr_inst = vector.InstRecursive(); + } + // The vector is expected to be a CompositeConstructU32x2 + if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { + return std::nullopt; + } + // Grab the first argument from the CompositeConstructU32x2, this is the low address. + return LowAddrInfo{ + .value{IR::U32{addr_inst->Arg(0)}}, + .imm_offset = imm_offset, + }; +} + +/// Tries to track the storage buffer address used by a global memory instruction +std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { + const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + return std::nullopt; + } + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a + // non-immediate index + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index = index.U32(), + .offset = offset.U32(), + }; + if (!Common::IsAligned(storage_buffer.offset, 16)) { + // The SSBO pointer has to be aligned + return std::nullopt; + } + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly + // point to them + return std::nullopt; + } + return storage_buffer; + }}; + return BreadthFirstSearch(value, pred); +} + +/// Collects the storage buffer used by a global memory instruction and the instruction itself +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) { + // NVN puts storage buffers in a specific range, we have to bias towards these addresses to + // avoid getting false positives + static constexpr Bias nvn_bias{ + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, + }; + // Track the low address of the instruction + const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; + if (!low_addr_info) { + // Failed to track the low address, use NVN fallbacks + return; + } + // First try to find storage buffers in the NVN address + const IR::U32 low_addr{low_addr_info->value}; + std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; + if (!storage_buffer) { + // If it fails, track without a bias + storage_buffer = Track(low_addr, nullptr); + if (!storage_buffer) { + // If that also fails, use NVN fallbacks + return; + } + } + // Collect storage buffer and the instruction + if (IsGlobalMemoryWrite(inst)) { + info.writes.insert(*storage_buffer); + } + info.set.insert(*storage_buffer); + info.to_replace.push_back(StorageInst{ + .storage_buffer{*storage_buffer}, + .inst = &inst, + .block = &block, + }); +} + +/// Returns the offset in indices (not bytes) for an equivalent storage instruction +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + IR::U32 offset; + if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { + offset = low_addr->value; + if (low_addr->imm_offset != 0) { + offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); + } + } else { + offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); + } + // Subtract the least significant 32 bits from the guest offset. The result is the storage + // buffer offset in bytes. + const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + return ir.ISub(offset, low_cbuf); +} + +/// Replace a global memory load instruction with its storage buffer equivalent +void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; + inst.ReplaceUsesWith(value); +} + +/// Replace a global memory write instruction with its storage buffer equivalent +void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); + inst.Invalidate(); +} + +/// Replace an atomic operation on global memory instruction with its storage buffer equivalent +void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{ + &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; + inst.ReplaceUsesWith(value); +} + +/// Replace a global memory instruction with its storage buffer equivalent +void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + return ReplaceLoad(block, inst, storage_index, offset); + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return ReplaceWrite(block, inst, storage_index, offset); + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return ReplaceAtomic(block, inst, storage_index, offset); + default: + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); + } +} +} // Anonymous namespace + +void GlobalMemoryToStorageBufferPass(IR::Program& program) { + StorageInfo info; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; + } + CollectStorageBuffers(*block, inst, info); + } + } + for (const StorageBufferAddr& storage_buffer : info.set) { + program.info.storage_buffers_descriptors.push_back({ + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, + .is_written = info.writes.contains(storage_buffer), + }); + } + for (const StorageInst& storage_inst : info.to_replace) { + const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; + const auto it{info.set.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; + IR::Block* const block{storage_inst.block}; + IR::Inst* const inst{storage_inst.inst}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + Replace(*block, *inst, index, offset); + } +} + +template <typename Descriptors, typename Descriptor, typename Func> +static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast<u32>(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast<u32>(descriptors.size()) - 1; +} + +void JoinStorageInfo(Info& base, Info& source) { + auto& descriptors = base.storage_buffers_descriptors; + for (auto& desc : source.storage_buffers_descriptors) { + auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; + })}; + if (it != descriptors.end()) { + it->is_written |= desc.is_written; + continue; + } + descriptors.push_back(desc); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp new file mode 100644 index 000000000..e9b55f835 --- /dev/null +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <vector> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void IdentityRemovalPass(IR::Program& program) { + std::vector<IR::Inst*> to_invalidate; + for (IR::Block* const block : program.blocks) { + for (auto inst = block->begin(); inst != block->end();) { + const size_t num_args{inst->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg; + while ((arg = inst->Arg(i)).IsIdentity()) { + inst->SetArg(i, arg.Inst()->Arg(0)); + } + } + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*inst); + inst = block->Instructions().erase(inst); + } else { + ++inst; + } + } + } + for (IR::Inst* const inst : to_invalidate) { + inst->Invalidate(); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..773e1f961 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -0,0 +1,143 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs16: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd16: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil16: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor16: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma16: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul16: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg16: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven16: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate16: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp16: + return IR::Opcode::FPClamp32; + case IR::Opcode::FPTrunc16: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF16x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF16x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF16x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF16x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF16x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF16x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF16x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF16x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF16x4: + return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual16: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual16: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual16: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual16: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan16: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan16: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan16: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan16: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual16: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual16: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual16: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual16: + return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan16: + return IR::Opcode::FPIsNan32; + case IR::Opcode::ConvertS16F16: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F16: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F16: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F16: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F16: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F16: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::PackFloat2x16: + return IR::Opcode::PackHalf2x16; + case IR::Opcode::UnpackFloat2x16: + return IR::Opcode::UnpackHalf2x16; + case IR::Opcode::ConvertF32F16: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF16F32: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF16S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF16S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF16S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF16S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF16U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF16U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF16U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF16U64: + return IR::Opcode::ConvertF32U64; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::GlobalAtomicAddF32x2; + case IR::Opcode::StorageAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::GlobalAtomicMinF32x2; + case IR::Opcode::StorageAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::GlobalAtomicMaxF32x2; + case IR::Opcode::StorageAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF32x2; + default: + return op; + } +} +} // Anonymous namespace + +void LowerFp16ToFp32(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.GetOpcode())); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp new file mode 100644 index 000000000..e80d3d1d9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -0,0 +1,218 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) { + if (packed.IsImmediate()) { + const u64 value{packed.U64()}; + return { + ir.Imm32(static_cast<u32>(value)), + ir.Imm32(static_cast<u32>(value >> 32)), + }; + } else { + return std::pair<IR::U32, IR::U32>{ + ir.CompositeExtract(packed, 0u), + ir.CompositeExtract(packed, 1u), + }; + } +} + +void IAdd64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("IAdd64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; + const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + + const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)}; + const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))}; + + const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ISub64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ISub64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; + const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + + const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)}; + const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)}; + const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))}; + + const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void INeg64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("INeg64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + lo = ir.BitwiseNot(lo); + hi = ir.BitwiseNot(hi); + + lo = ir.IAdd(lo, ir.Imm32(1)); + + const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))}; + hi = ir.IAdd(hi, carry); + + inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi)); +} + +void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_lo{ir.Imm32(0)}; + const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)}; + const IR::U32 short_ret_lo{shifted_lo}; + const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_hi{ir.Imm32(0)}; + const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)}; + const IR::U32 short_ret_hi{shifted_hi}; + const IR::U32 short_ret_lo{ + ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)}; + + const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_hi{sign_extension}; + const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift)); + const IR::U32 short_ret_hi{shifted_hi}; + const IR::U32 short_ret_lo{ + ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + return inst.ReplaceOpcode(IR::Opcode::Identity); + case IR::Opcode::IAdd64: + return IAdd64To32(block, inst); + case IR::Opcode::ISub64: + return ISub64To32(block, inst); + case IR::Opcode::INeg64: + return INeg64To32(block, inst); + case IR::Opcode::ShiftLeftLogical64: + return ShiftLeftLogical64To32(block, inst); + case IR::Opcode::ShiftRightLogical64: + return ShiftRightLogical64To32(block, inst); + case IR::Opcode::ShiftRightArithmetic64: + return ShiftRightArithmetic64To32(block, inst); + default: + break; + } +} +} // Anonymous namespace + +void LowerInt64ToInt32(IR::Program& program) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h new file mode 100644 index 000000000..2f89b1ea0 --- /dev/null +++ b/src/shader_recompiler/ir_opt/passes.h @@ -0,0 +1,32 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <span> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Optimization { + +void CollectShaderInfoPass(Environment& env, IR::Program& program); +void ConstantPropagationPass(IR::Program& program); +void DeadCodeEliminationPass(IR::Program& program); +void GlobalMemoryToStorageBufferPass(IR::Program& program); +void IdentityRemovalPass(IR::Program& program); +void LowerFp16ToFp32(IR::Program& program); +void LowerInt64ToInt32(IR::Program& program); +void SsaRewritePass(IR::Program& program); +void TexturePass(Environment& env, IR::Program& program); +void VerificationPass(const IR::Program& program); + +// Dual Vertex +void VertexATransformPass(IR::Program& program); +void VertexBTransformPass(IR::Program& program); +void JoinTextureInfo(Info& base, Info& source); +void JoinStorageInfo(Info& base, Info& source); + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 000000000..53145fb5e --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -0,0 +1,383 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file implements the SSA rewriting algorithm proposed in +// +// Simple and Efficient Construction of Static Single Assignment Form. +// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013) +// In: Jhala R., De Bosschere K. (eds) +// Compiler Construction. CC 2013. +// Lecture Notes in Computer Science, vol 7791. +// Springer, Berlin, Heidelberg +// +// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 +// + +#include <span> +#include <variant> +#include <vector> + +#include <boost/container/flat_map.hpp> +#include <boost/container/flat_set.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +struct FlagTag { + auto operator<=>(const FlagTag&) const noexcept = default; +}; +struct ZeroFlagTag : FlagTag {}; +struct SignFlagTag : FlagTag {}; +struct CarryFlagTag : FlagTag {}; +struct OverflowFlagTag : FlagTag {}; + +struct GotoVariable : FlagTag { + GotoVariable() = default; + explicit GotoVariable(u32 index_) : index{index_} {} + + auto operator<=>(const GotoVariable&) const noexcept = default; + + u32 index; +}; + +struct IndirectBranchVariable { + auto operator<=>(const IndirectBranchVariable&) const noexcept = default; +}; + +using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag, + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; +using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>; + +struct DefTable { + const IR::Value& Def(IR::Block* block, IR::Reg variable) { + return block->SsaRegValue(variable); + } + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { + block->SetSsaRegValue(variable, value); + } + + const IR::Value& Def(IR::Block* block, IR::Pred variable) { + return preds[IR::PredIndex(variable)][block]; + } + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { + preds[IR::PredIndex(variable)].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, GotoVariable variable) { + return goto_vars[variable.index][block]; + } + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { + goto_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { + return indirect_branch_var[block]; + } + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { + indirect_branch_var.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { + return zero_flag[block]; + } + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { + zero_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, SignFlagTag) { + return sign_flag[block]; + } + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { + sign_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, CarryFlagTag) { + return carry_flag[block]; + } + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { + carry_flag.insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { + return overflow_flag[block]; + } + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { + overflow_flag.insert_or_assign(block, value); + } + + std::array<ValueMap, IR::NUM_USER_PREDS> preds; + boost::container::flat_map<u32, ValueMap> goto_vars; + ValueMap indirect_branch_var; + ValueMap zero_flag; + ValueMap sign_flag; + ValueMap carry_flag; + ValueMap overflow_flag; +}; + +IR::Opcode UndefOpcode(IR::Reg) noexcept { + return IR::Opcode::UndefU32; +} + +IR::Opcode UndefOpcode(IR::Pred) noexcept { + return IR::Opcode::UndefU1; +} + +IR::Opcode UndefOpcode(const FlagTag&) noexcept { + return IR::Opcode::UndefU1; +} + +IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { + return IR::Opcode::UndefU32; +} + +enum class Status { + Start, + SetValue, + PreparePhiArgument, + PushPhiArgument, +}; + +template <typename Type> +struct ReadState { + ReadState(IR::Block* block_) : block{block_} {} + ReadState() = default; + + IR::Block* block{}; + IR::Value result{}; + IR::Inst* phi{}; + IR::Block* const* pred_it{}; + IR::Block* const* pred_end{}; + Status pc{Status::Start}; +}; + +class Pass { +public: + template <typename Type> + void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { + current_def.SetDef(block, variable, value); + } + + template <typename Type> + IR::Value ReadVariable(Type variable, IR::Block* root_block) { + boost::container::small_vector<ReadState<Type>, 64> stack{ + ReadState<Type>(nullptr), + ReadState<Type>(root_block), + }; + const auto prepare_phi_operand{[&] { + if (stack.back().pred_it == stack.back().pred_end) { + IR::Inst* const phi{stack.back().phi}; + IR::Block* const block{stack.back().block}; + const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))}; + stack.pop_back(); + stack.back().result = result; + WriteVariable(variable, block, result); + } else { + IR::Block* const imm_pred{*stack.back().pred_it}; + stack.back().pc = Status::PushPhiArgument; + stack.emplace_back(imm_pred); + } + }}; + do { + IR::Block* const block{stack.back().block}; + switch (stack.back().pc) { + case Status::Start: { + if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { + stack.back().result = def; + } else if (!block->IsSsaSealed()) { + // Incomplete CFG + IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + + incomplete_phis[block].insert_or_assign(variable, phi); + stack.back().result = IR::Value{&*phi}; + } else if (const std::span imm_preds = block->ImmPredecessors(); + imm_preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed + stack.back().pc = Status::SetValue; + stack.emplace_back(imm_preds.front()); + break; + } else { + // Break potential cycles with operandless phi + IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + + WriteVariable(variable, block, IR::Value{phi}); + + stack.back().phi = phi; + stack.back().pred_it = imm_preds.data(); + stack.back().pred_end = imm_preds.data() + imm_preds.size(); + prepare_phi_operand(); + break; + } + } + [[fallthrough]]; + case Status::SetValue: { + const IR::Value result{stack.back().result}; + WriteVariable(variable, block, result); + stack.pop_back(); + stack.back().result = result; + break; + } + case Status::PushPhiArgument: { + IR::Inst* const phi{stack.back().phi}; + phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); + ++stack.back().pred_it; + } + [[fallthrough]]; + case Status::PreparePhiArgument: + prepare_phi_operand(); + break; + } + } while (stack.size() > 1); + return stack.back().result; + } + + void SealBlock(IR::Block* block) { + const auto it{incomplete_phis.find(block)}; + if (it != incomplete_phis.end()) { + for (auto& pair : it->second) { + auto& variant{pair.first}; + auto& phi{pair.second}; + std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); + } + } + block->SsaSeal(); + } + +private: + template <typename Type> + IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { + for (IR::Block* const imm_pred : block->ImmPredecessors()) { + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); + } + return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); + } + + IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value same; + const size_t num_args{phi.NumArgs()}; + for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { + const IR::Value& op{phi.Arg(arg_index)}; + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { + // Unique value or self-reference + continue; + } + if (!same.IsEmpty()) { + // The phi merges at least two values: not trivial + return IR::Value{&phi}; + } + same = op; + } + // Remove the phi node from the block, it will be reinserted + IR::Block::InstructionList& list{block->Instructions()}; + list.erase(IR::Block::InstructionList::s_iterator_to(phi)); + + // Find the first non-phi instruction and use it as an insertion point + IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)}; + if (same.IsEmpty()) { + // The phi is unreachable or in the start block + // Insert an undefined instruction and make it the phi node replacement + // The "phi" node reinsertion point is specified after this instruction + reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode); + same = IR::Value{&*reinsert_point}; + ++reinsert_point; + } + // Reinsert the phi node and reroute all its uses to the "same" value + list.insert(reinsert_point, phi); + phi.ReplaceUsesWith(same); + // TODO: Try to recursively remove all phi users, which might have become trivial + return same; + } + + boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>> + incomplete_phis; + DefTable current_def; +}; + +void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; + case IR::Opcode::SetIndirectBranchVariable: + pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetZFlag: + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetSFlag: + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetCFlag: + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetOFlag: + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); + } + break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; + case IR::Opcode::GetIndirectBranchVariable: + inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); + break; + case IR::Opcode::GetZFlag: + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); + break; + case IR::Opcode::GetSFlag: + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); + break; + case IR::Opcode::GetCFlag: + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); + break; + case IR::Opcode::GetOFlag: + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); + break; + default: + break; + } +} + +void VisitBlock(Pass& pass, IR::Block* block) { + for (IR::Inst& inst : block->Instructions()) { + VisitInst(pass, block, inst); + } + pass.SealBlock(block); +} +} // Anonymous namespace + +void SsaRewritePass(IR::Program& program) { + Pass pass; + const auto end{program.post_order_blocks.rend()}; + for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { + VisitBlock(pass, *block); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..44ad10d43 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -0,0 +1,523 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <bit> +#include <optional> + +#include <boost/container/small_vector.hpp> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +struct ConstBufferAddr { + u32 index; + u32 offset; + u32 secondary_index; + u32 secondary_offset; + IR::U32 dynamic_offset; + u32 count; + bool has_secondary; +}; + +struct TextureInst { + ConstBufferAddr cbuf; + IR::Inst* inst; + IR::Block* block; +}; + +using TextureInstVector = boost::container::small_vector<TextureInst, 24>; + +constexpr u32 DESCRIPTOR_SIZE = 8; +constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE)); + +IR::Opcode IndexedInstruction(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + return IR::Opcode::ImageSampleImplicitLod; + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + return IR::Opcode::ImageSampleExplicitLod; + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + return IR::Opcode::ImageSampleDrefImplicitLod; + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return IR::Opcode::ImageSampleDrefExplicitLod; + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BoundImageGather: + return IR::Opcode::ImageGather; + case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BoundImageGatherDref: + return IR::Opcode::ImageGatherDref; + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BoundImageFetch: + return IR::Opcode::ImageFetch; + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BindlessImageQueryDimensions: + return IR::Opcode::ImageQueryDimensions; + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BindlessImageQueryLod: + return IR::Opcode::ImageQueryLod; + case IR::Opcode::BoundImageGradient: + case IR::Opcode::BindlessImageGradient: + return IR::Opcode::ImageGradient; + case IR::Opcode::BoundImageRead: + case IR::Opcode::BindlessImageRead: + return IR::Opcode::ImageRead; + case IR::Opcode::BoundImageWrite: + case IR::Opcode::BindlessImageWrite: + return IR::Opcode::ImageWrite; + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicIAdd32: + return IR::Opcode::ImageAtomicIAdd32; + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicSMin32: + return IR::Opcode::ImageAtomicSMin32; + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + return IR::Opcode::ImageAtomicUMin32; + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicSMax32: + return IR::Opcode::ImageAtomicSMax32; + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + return IR::Opcode::ImageAtomicUMax32; + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicInc32: + return IR::Opcode::ImageAtomicInc32; + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicDec32: + return IR::Opcode::ImageAtomicDec32; + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicAnd32: + return IR::Opcode::ImageAtomicAnd32; + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicOr32: + return IR::Opcode::ImageAtomicOr32; + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicXor32: + return IR::Opcode::ImageAtomicXor32; + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::BindlessImageAtomicExchange32: + return IR::Opcode::ImageAtomicExchange32; + default: + return IR::Opcode::Void; + } +} + +bool IsBindless(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: + case IR::Opcode::BindlessImageQueryLod: + case IR::Opcode::BindlessImageGradient: + case IR::Opcode::BindlessImageRead: + case IR::Opcode::BindlessImageWrite: + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + return true; + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageGather: + case IR::Opcode::BoundImageGatherDref: + case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BoundImageGradient: + case IR::Opcode::BoundImageRead: + case IR::Opcode::BoundImageWrite: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + return false; + default: + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); + } +} + +bool IsTextureInstruction(const IR::Inst& inst) { + return IndexedInstruction(inst) != IR::Opcode::Void; +} + +std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst); + +std::optional<ConstBufferAddr> Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); +} + +std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { + switch (inst->GetOpcode()) { + default: + return std::nullopt; + case IR::Opcode::BitwiseOr32: { + std::optional lhs{Track(inst->Arg(0))}; + std::optional rhs{Track(inst->Arg(1))}; + if (!lhs || !rhs) { + return std::nullopt; + } + if (lhs->has_secondary || rhs->has_secondary) { + return std::nullopt; + } + if (lhs->count > 1 || rhs->count > 1) { + return std::nullopt; + } + if (lhs->index > rhs->index || lhs->offset > rhs->offset) { + std::swap(lhs, rhs); + } + return ConstBufferAddr{ + .index = lhs->index, + .offset = lhs->offset, + .secondary_index = rhs->index, + .secondary_offset = rhs->offset, + .dynamic_offset = {}, + .count = 1, + .has_secondary = true, + }; + } + case IR::Opcode::GetCbufU32x2: + case IR::Opcode::GetCbufU32: + break; + } + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; + } + if (offset.IsImmediate()) { + return ConstBufferAddr{ + .index = index.U32(), + .offset = offset.U32(), + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, + }; + } + IR::Inst* const offset_inst{offset.InstRecursive()}; + if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { + return std::nullopt; + } + u32 base_offset{}; + IR::U32 dynamic_offset; + if (offset_inst->Arg(0).IsImmediate()) { + base_offset = offset_inst->Arg(0).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(1)}; + } else if (offset_inst->Arg(1).IsImmediate()) { + base_offset = offset_inst->Arg(1).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(0)}; + } else { + return std::nullopt; + } + return ConstBufferAddr{ + .index = index.U32(), + .offset = base_offset, + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = dynamic_offset, + .count = 8, + .has_secondary = false, + }; +} + +TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { + ConstBufferAddr addr; + if (IsBindless(inst)) { + const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))}; + if (!track_addr) { + throw NotImplementedException("Failed to track bindless texture constant buffer"); + } + addr = *track_addr; + } else { + addr = ConstBufferAddr{ + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, + }; + } + return TextureInst{ + .cbuf = addr, + .inst = &inst, + .block = block, + }; +} + +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; + const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; + const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; + return env.ReadTextureType(lhs_raw | rhs_raw); +} + +class Descriptors { +public: + explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + ImageBufferDescriptors& image_buffer_descriptors_, + TextureDescriptors& texture_descriptors_, + ImageDescriptors& image_descriptors_) + : texture_buffer_descriptors{texture_buffer_descriptors_}, + image_buffer_descriptors{image_buffer_descriptors_}, + texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift && + desc.has_secondary == existing.has_secondary; + }); + } + + u32 Add(const ImageBufferDescriptor& desc) { + const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; + })}; + image_buffer_descriptors[index].is_written |= desc.is_written; + image_buffer_descriptors[index].is_read |= desc.is_read; + return index; + } + + u32 Add(const TextureDescriptor& desc) { + return Add(texture_descriptors, desc, [&desc](const auto& existing) { + return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift; + }); + } + + u32 Add(const ImageDescriptor& desc) { + const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { + return desc.type == existing.type && desc.format == existing.format && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; + })}; + image_descriptors[index].is_written |= desc.is_written; + image_descriptors[index].is_read |= desc.is_read; + return index; + } + +private: + template <typename Descriptors, typename Descriptor, typename Func> + static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast<u32>(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast<u32>(descriptors.size()) - 1; + } + + TextureBufferDescriptors& texture_buffer_descriptors; + ImageBufferDescriptors& image_buffer_descriptors; + TextureDescriptors& texture_descriptors; + ImageDescriptors& image_descriptors; +}; +} // Anonymous namespace + +void TexturePass(Environment& env, IR::Program& program) { + TextureInstVector to_replace; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; + } + to_replace.push_back(MakeInst(env, block, inst)); + } + } + // Sort instructions to visit textures by constant buffer index, then by offset + std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.offset < rhs.cbuf.offset; + }); + std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.index < rhs.cbuf.index; + }); + Descriptors descriptors{ + program.info.texture_buffer_descriptors, + program.info.image_buffer_descriptors, + program.info.texture_descriptors, + program.info.image_descriptors, + }; + for (TextureInst& texture_inst : to_replace) { + // TODO: Handle arrays + IR::Inst* const inst{texture_inst.inst}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + + const auto& cbuf{texture_inst.cbuf}; + auto flags{inst->Flags<IR::TextureInstInfo>()}; + switch (inst->GetOpcode()) { + case IR::Opcode::ImageQueryDimensions: + flags.type.Assign(ReadTextureType(env, cbuf)); + inst->SetFlags(flags); + break; + case IR::Opcode::ImageFetch: + if (flags.type != TextureType::Color1D) { + break; + } + if (ReadTextureType(env, cbuf) == TextureType::Buffer) { + // Replace with the bound texture type only when it's a texture buffer + // If the instruction is 1D and the bound type is 2D, don't change the code and let + // the rasterizer robustness handle it + // This happens on Fire Emblem: Three Houses + flags.type.Assign(TextureType::Buffer); + } + break; + default: + break; + } + u32 index; + switch (inst->GetOpcode()) { + case IR::Opcode::ImageRead: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: + case IR::Opcode::ImageWrite: { + if (cbuf.has_secondary) { + throw NotImplementedException("Unexpected separate sampler"); + } + const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(ImageBufferDescriptor{ + .format = flags.image_format, + .is_written = is_written, + .is_read = is_read, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, + }); + } else { + index = descriptors.Add(ImageDescriptor{ + .type = flags.type, + .format = flags.image_format, + .is_written = is_written, + .is_read = is_read, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, + }); + } + break; + } + default: + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .has_secondary = cbuf.has_secondary, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type = flags.type, + .is_depth = flags.is_depth != 0, + .has_secondary = cbuf.has_secondary, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, + }); + } + break; + } + flags.descriptor_index.Assign(index); + inst->SetFlags(flags); + + if (cbuf.count > 1) { + const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; + IR::IREmitter ir{*texture_inst.block, insert_point}; + const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; + inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); + } else { + inst->SetArg(0, IR::Value{}); + } + } +} + +void JoinTextureInfo(Info& base, Info& source) { + Descriptors descriptors{ + base.texture_buffer_descriptors, + base.image_buffer_descriptors, + base.texture_descriptors, + base.image_descriptors, + }; + for (auto& desc : source.texture_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.texture_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_descriptors) { + descriptors.Add(desc); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp new file mode 100644 index 000000000..975d5aadf --- /dev/null +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -0,0 +1,98 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <set> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +static void ValidateTypes(const IR::Program& program) { + for (const auto& block : program.blocks) { + for (const IR::Inst& inst : *block) { + if (inst.GetOpcode() == IR::Opcode::Phi) { + // Skip validation on phi nodes + continue; + } + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Type t1{inst.Arg(i).Type()}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; + if (!IR::AreTypesCompatible(t1, t2)) { + throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); + } + } + } + } +} + +static void ValidateUses(const IR::Program& program) { + std::map<IR::Inst*, int> actual_uses; + for (const auto& block : program.blocks) { + for (const IR::Inst& inst : *block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Value arg{inst.Arg(i)}; + if (!arg.IsImmediate()) { + ++actual_uses[arg.Inst()]; + } + } + } + } + for (const auto [inst, uses] : actual_uses) { + if (inst->UseCount() != uses) { + throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program)); + } + } +} + +static void ValidateForwardDeclarations(const IR::Program& program) { + std::set<const IR::Inst*> definitions; + for (const IR::Block* const block : program.blocks) { + for (const IR::Inst& inst : *block) { + definitions.emplace(&inst); + if (inst.GetOpcode() == IR::Opcode::Phi) { + // Phi nodes can have forward declarations + continue; + } + const size_t num_args{inst.NumArgs()}; + for (size_t arg = 0; arg < num_args; ++arg) { + if (inst.Arg(arg).IsImmediate()) { + continue; + } + if (!definitions.contains(inst.Arg(arg).Inst())) { + throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); + } + } + } + } +} + +static void ValidatePhiNodes(const IR::Program& program) { + for (const IR::Block* const block : program.blocks) { + bool no_more_phis{false}; + for (const IR::Inst& inst : *block) { + if (inst.GetOpcode() == IR::Opcode::Phi) { + if (no_more_phis) { + throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); + } + } else { + no_more_phis = true; + } + } + } +} + +void VerificationPass(const IR::Program& program) { + ValidateTypes(program); + ValidateUses(program); + ValidateForwardDeclarations(program); + ValidatePhiNodes(program); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h new file mode 100644 index 000000000..f8b255b66 --- /dev/null +++ b/src/shader_recompiler/object_pool.h @@ -0,0 +1,104 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <type_traits> +#include <utility> + +namespace Shader { + +template <typename T> +requires std::is_destructible_v<T> class ObjectPool { +public: + explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { + node = &chunks.emplace_back(new_chunk_size); + } + + template <typename... Args> + requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { + return std::construct_at(Memory(), std::forward<Args>(args)...); + } + + void ReleaseContents() { + if (chunks.empty()) { + return; + } + Chunk& root{chunks.front()}; + if (root.used_objects == root.num_objects) { + // Root chunk has been filled, squash allocations into it + const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)}; + chunks.clear(); + chunks.emplace_back(total_objects); + } else { + root.Release(); + chunks.resize(1); + } + chunks.shrink_to_fit(); + node = &chunks.front(); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Storage { + Storage() noexcept {} + ~Storage() noexcept {} + + NonTrivialDummy dummy{}; + T object; + }; + + struct Chunk { + explicit Chunk() = default; + explicit Chunk(size_t size) + : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {} + + Chunk& operator=(Chunk&& rhs) noexcept { + Release(); + used_objects = std::exchange(rhs.used_objects, 0); + num_objects = std::exchange(rhs.num_objects, 0); + storage = std::move(rhs.storage); + } + + Chunk(Chunk&& rhs) noexcept + : used_objects{std::exchange(rhs.used_objects, 0)}, + num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {} + + ~Chunk() { + Release(); + } + + void Release() { + std::destroy_n(storage.get(), used_objects); + used_objects = 0; + } + + size_t used_objects{}; + size_t num_objects{}; + std::unique_ptr<Storage[]> storage; + }; + + [[nodiscard]] T* Memory() { + Chunk* const chunk{FreeChunk()}; + return &chunk->storage[chunk->used_objects++].object; + } + + [[nodiscard]] Chunk* FreeChunk() { + if (node->used_objects != node->num_objects) { + return node; + } + node = &chunks.emplace_back(new_chunk_size); + return node; + } + + Chunk* node{}; + std::vector<Chunk> chunks; + size_t new_chunk_size{}; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..f0c3b3b17 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader { + +struct Profile { + u32 supported_spirv{0x00010000}; + + bool unified_descriptor_binding{}; + bool support_descriptor_aliasing{}; + bool support_int8{}; + bool support_int16{}; + bool support_int64{}; + bool support_vertex_instance_id{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; + bool support_fp16_signed_zero_nan_preserve{}; + bool support_fp32_signed_zero_nan_preserve{}; + bool support_fp64_signed_zero_nan_preserve{}; + bool support_explicit_workgroup_layout{}; + bool support_vote{}; + bool support_viewport_index_layer_non_geometry{}; + bool support_viewport_mask{}; + bool support_typeless_image_loads{}; + bool support_demote_to_helper_invocation{}; + bool support_int64_atomics{}; + bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; + bool support_gl_nv_gpu_shader_5{}; + bool support_gl_amd_gpu_shader_half_float{}; + bool support_gl_texture_shadow_lod{}; + bool support_gl_warp_intrinsics{}; + bool support_gl_variable_aoffi{}; + bool support_gl_sparse_textures{}; + bool support_gl_derivative_control{}; + + bool warp_size_potentially_larger_than_guest{}; + + bool lower_left_origin_mode{}; + /// Fragment outputs have to be declared even if they are not written to avoid undefined values. + /// See Ori and the Blind Forest's main menu for reference. + bool need_declared_frag_colors{}; + /// Prevents fast math optimizations that may cause inaccuracies + bool need_fastmath_off{}; + + /// OpFClamp is broken and OpFMax + OpFMin should be used instead + bool has_broken_spirv_clamp{}; + /// Offset image operands with an unsigned type do not work + bool has_broken_unsigned_image_offsets{}; + /// Signed instructions with unsigned data types are misinterpreted + bool has_broken_signed_operations{}; + /// Float controls break when fp16 is enabled + bool has_broken_fp16_float_controls{}; + /// Dynamic vec4 indexing is broken on some OpenGL drivers + bool has_gl_component_indexing_bug{}; + /// The precise type qualifier is broken in the fragment stage of some drivers + bool has_gl_precise_bug{}; + /// Ignores SPIR-V ordered vs unordered using GLSL semantics + bool ignore_nan_fp_comparisons{}; + + u32 gl_max_compute_smem_size{}; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..bd6c2bfb5 --- /dev/null +++ b/src/shader_recompiler/program_header.h @@ -0,0 +1,219 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <optional> + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +enum class PixelImap : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html +struct ProgramHeader { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + + union { + BitField<0, 1, u8> primitive_array_id; + BitField<1, 1, u8> rt_array_index; + BitField<2, 1, u8> viewport_index; + BitField<3, 1, u8> point_size; + BitField<4, 1, u8> position_x; + BitField<5, 1, u8> position_y; + BitField<6, 1, u8> position_z; + BitField<7, 1, u8> position_w; + u8 raw; + } imap_systemb; + + std::array<u8, 16> imap_generic_vector; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + }; + INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved + INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA + + union { + BitField<0, 1, u8> primitive_array_id; + BitField<1, 1, u8> rt_array_index; + BitField<2, 1, u8> viewport_index; + BitField<3, 1, u8> point_size; + BitField<4, 1, u8> position_x; + BitField<5, 1, u8> position_y; + BitField<6, 1, u8> position_z; + BitField<7, 1, u8> position_w; + u8 raw; + } omap_systemb; + + std::array<u8, 16> omap_generic_vector; + + INSERT_PADDING_BYTES_NOINIT(2); // OmapColor + + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + } omap_systemc; + + INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + + [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; + } + + [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; + } + } vtg; + + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + + union { + BitField<0, 1, u8> primitive_array_id; + BitField<1, 1, u8> rt_array_index; + BitField<2, 1, u8> viewport_index; + BitField<3, 1, u8> point_size; + BitField<4, 1, u8> position_x; + BitField<5, 1, u8> position_y; + BitField<6, 1, u8> position_z; + BitField<7, 1, u8> position_w; + BitField<0, 4, u8> first; + BitField<4, 4, u8> position; + u8 raw; + } imap_systemb; + + union { + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; + u8 raw; + } imap_generic_vector[32]; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved + + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; + } omap; + + [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept { + const u32 bits{omap.target >> (rt * 4)}; + return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; + } + + [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const { + const auto& vector{imap_generic_vector[attribute]}; + return {vector.x, vector.y, vector.z, vector.w}; + } + + [[nodiscard]] bool IsGenericVectorActive(size_t index) const { + return imap_generic_vector[index].raw != 0; + } + } ps; + + std::array<u32, 0xf> raw; + }; + + [[nodiscard]] u64 LocalMemorySize() const noexcept { + return static_cast<u64>(common1.shader_local_memory_low_size) | + (static_cast<u64>(common2.shader_local_memory_high_size) << 24); + } +}; +static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); + +} // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 000000000..f3f83a258 --- /dev/null +++ b/src/shader_recompiler/runtime_info.h @@ -0,0 +1,88 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <bitset> +#include <optional> +#include <vector> + +#include "common/common_types.h" +#include "shader_recompiler/varying_state.h" + +namespace Shader { + +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, + Disabled, +}; + +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + +struct TransformFeedbackVarying { + u32 buffer{}; + u32 stride{}; + u32 offset{}; + u32 components{}; +}; + +struct RuntimeInfo { + std::array<AttributeType, 32> generic_input_types{}; + VaryingState previous_stage_stores; + + bool convert_depth_mode{}; + bool force_early_z{}; + + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + + InputTopology input_topology{}; + + std::optional<float> fixed_state_point_size; + std::optional<CompareFunction> alpha_test_func; + float alpha_test_reference{}; + + /// Static Y negate value + bool y_negate{}; + /// Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; + + /// Transform feedback state for each varying + std::vector<TransformFeedbackVarying> xfb_varyings; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h new file mode 100644 index 000000000..4ef4dbd40 --- /dev/null +++ b/src/shader_recompiler/shader_info.h @@ -0,0 +1,193 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <bitset> + +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" + +#include <boost/container/small_vector.hpp> +#include <boost/container/static_vector.hpp> + +namespace Shader { + +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + ColorArrayCube, + Buffer, +}; +constexpr u32 NUM_TEXTURE_TYPES = 8; + +enum class ImageFormat : u32 { + Typeless, + R8_UINT, + R8_SINT, + R16_UINT, + R16_SINT, + R32_UINT, + R32G32_UINT, + R32G32B32A32_UINT, +}; + +enum class Interpolation { + Smooth, + Flat, + NoPerspective, +}; + +struct ConstantBufferDescriptor { + u32 index; + u32 count; +}; + +struct StorageBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; + bool is_written; +}; + +struct TextureBufferDescriptor { + bool has_secondary; + u32 cbuf_index; + u32 cbuf_offset; + u32 secondary_cbuf_index; + u32 secondary_cbuf_offset; + u32 count; + u32 size_shift; +}; +using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; + +struct ImageBufferDescriptor { + ImageFormat format; + bool is_written; + bool is_read; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; + u32 size_shift; +}; +using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; + +struct TextureDescriptor { + TextureType type; + bool is_depth; + bool has_secondary; + u32 cbuf_index; + u32 cbuf_offset; + u32 secondary_cbuf_index; + u32 secondary_cbuf_offset; + u32 count; + u32 size_shift; +}; +using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; + +struct ImageDescriptor { + TextureType type; + ImageFormat format; + bool is_written; + bool is_read; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; + u32 size_shift; +}; +using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; + +struct Info { + static constexpr size_t MAX_CBUFS{18}; + static constexpr size_t MAX_SSBOS{32}; + + bool uses_workgroup_id{}; + bool uses_local_invocation_id{}; + bool uses_invocation_id{}; + bool uses_sample_id{}; + bool uses_is_helper_invocation{}; + bool uses_subgroup_invocation_id{}; + bool uses_subgroup_shuffles{}; + std::array<bool, 30> uses_patches{}; + + std::array<Interpolation, 32> interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; + + bool loads_indexed_attributes{}; + + std::array<bool, 8> stores_frag_color{}; + bool stores_sample_mask{}; + bool stores_frag_depth{}; + + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; + + bool stores_indexed_attributes{}; + + bool stores_global_memory{}; + + bool uses_fp16{}; + bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; + bool uses_int8{}; + bool uses_int16{}; + bool uses_int64{}; + bool uses_image_1d{}; + bool uses_sampled_1d{}; + bool uses_sparse_residency{}; + bool uses_demote_to_helper_invocation{}; + bool uses_subgroup_vote{}; + bool uses_subgroup_mask{}; + bool uses_fswzadd{}; + bool uses_derivatives{}; + bool uses_typeless_image_reads{}; + bool uses_typeless_image_writes{}; + bool uses_image_buffers{}; + bool uses_shared_increment{}; + bool uses_shared_decrement{}; + bool uses_global_increment{}; + bool uses_global_decrement{}; + bool uses_atomic_f32_add{}; + bool uses_atomic_f16x2_add{}; + bool uses_atomic_f16x2_min{}; + bool uses_atomic_f16x2_max{}; + bool uses_atomic_f32x2_add{}; + bool uses_atomic_f32x2_min{}; + bool uses_atomic_f32x2_max{}; + bool uses_atomic_s32_min{}; + bool uses_atomic_s32_max{}; + bool uses_int64_bit_atomics{}; + bool uses_global_memory{}; + bool uses_atomic_image_u32{}; + bool uses_shadow_lod{}; + + IR::Type used_constant_buffer_types{}; + IR::Type used_storage_buffer_types{}; + + u32 constant_buffer_mask{}; + std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; + u32 nvn_buffer_base{}; + std::bitset<16> nvn_buffer_used{}; + + boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> + constant_buffer_descriptors; + boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; + TextureBufferDescriptors texture_buffer_descriptors; + ImageBufferDescriptors image_buffer_descriptors; + TextureDescriptors texture_descriptors; + ImageDescriptors image_descriptors; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..5c1c8d8fc --- /dev/null +++ b/src/shader_recompiler/stage.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader { + +enum class Stage : u32 { + VertexB, + TessellationControl, + TessellationEval, + Geometry, + Fragment, + + Compute, + + VertexA, +}; +constexpr u32 MaxStageTypes = 6; + +[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { + return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index); +} + +} // namespace Shader diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> +#include <cstddef> + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast<size_t>(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast<size_t>(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] || + mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] && + mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738..aa6e86593 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -17,10 +17,12 @@ struct Noisy { Noisy& operator=(Noisy&& rhs) noexcept { state = "Move assigned"; rhs.state = "Moved away"; + return *this; } Noisy(const Noisy&) : state{"Copied constructed"} {} Noisy& operator=(const Noisy&) { state = "Copied assigned"; + return *this; } std::string state; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,7 +29,6 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h - engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp @@ -44,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -61,8 +57,6 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h - guest_driver.cpp - guest_driver.h memory_manager.cpp memory_manager.h query_cache.h @@ -71,26 +65,25 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h - renderer_opengl/gl_arb_decompiler.cpp - renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_pipeline.cpp + renderer_opengl/gl_compute_pipeline.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_pipeline.cpp + renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h - renderer_opengl/gl_shader_disk_cache.cpp - renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h + renderer_opengl/gl_shader_context.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp @@ -112,6 +105,7 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/pipeline_helper.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -138,12 +132,12 @@ add_library(video_core STATIC renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_render_pass_cache.cpp + renderer_vulkan/vk_render_pass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp @@ -156,60 +150,12 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + shader_cache.cpp shader_cache.h + shader_environment.cpp + shader_environment.h shader_notify.cpp shader_notify.h - shader/decode/arithmetic.cpp - shader/decode/arithmetic_immediate.cpp - shader/decode/bfe.cpp - shader/decode/bfi.cpp - shader/decode/shift.cpp - shader/decode/arithmetic_integer.cpp - shader/decode/arithmetic_integer_immediate.cpp - shader/decode/arithmetic_half.cpp - shader/decode/arithmetic_half_immediate.cpp - shader/decode/ffma.cpp - shader/decode/hfma2.cpp - shader/decode/conversion.cpp - shader/decode/memory.cpp - shader/decode/texture.cpp - shader/decode/image.cpp - shader/decode/float_set_predicate.cpp - shader/decode/integer_set_predicate.cpp - shader/decode/half_set_predicate.cpp - shader/decode/predicate_set_register.cpp - shader/decode/predicate_set_predicate.cpp - shader/decode/register_set_predicate.cpp - shader/decode/float_set.cpp - shader/decode/integer_set.cpp - shader/decode/half_set.cpp - shader/decode/video.cpp - shader/decode/warp.cpp - shader/decode/xmad.cpp - shader/decode/other.cpp - shader/ast.cpp - shader/ast.h - shader/async_shaders.cpp - shader/async_shaders.h - shader/compiler_settings.cpp - shader/compiler_settings.h - shader/control_flow.cpp - shader/control_flow.h - shader/decode.cpp - shader/expr.cpp - shader/expr.h - shader/memory_util.cpp - shader/memory_util.h - shader/node_helper.cpp - shader/node_helper.h - shader/node.h - shader/registry.cpp - shader/registry.h - shader/shader_ir.cpp - shader/shader_ir.h - shader/track.cpp - shader/transform_feedback.cpp - shader/transform_feedback.h surface.cpp surface.h texture_cache/accelerated_swizzle.cpp @@ -242,6 +188,8 @@ add_library(video_core STATIC textures/decoders.h textures/texture.cpp textures/texture.h + transform_feedback.cpp + transform_feedback.h video_core.cpp video_core.h vulkan_common/vulkan_debug_callback.cpp @@ -265,7 +213,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5a0b6f0c0..24c858104 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; template <typename P> class BufferCache { @@ -67,6 +73,7 @@ class BufferCache { static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +103,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -133,20 +144,31 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format, bool is_written, bool is_image); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, + bool is_written, bool is_image); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -178,6 +200,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template <typename Func> @@ -254,12 +277,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +301,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +311,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +354,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); @@ -336,7 +370,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector<Buffer> slot_buffers; DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; @@ -347,20 +380,30 @@ private: std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; + std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; + std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; + + std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; - std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array<u32, NUM_STAGES> enabled_storage_buffers{}; std::array<u32, NUM_STAGES> written_storage_buffers{}; u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; - std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + std::array<u32, NUM_STAGES> enabled_texture_buffers{}; + std::array<u32, NUM_STAGES> written_texture_buffers{}; + std::array<u32, NUM_STAGES> image_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array<u32, 16> uniform_cache_hits{}; std::array<u32, 16> uniform_cache_shots{}; @@ -371,6 +414,10 @@ private: std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> dirty_uniform_buffers{}; + std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; + std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, + std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> + uniform_buffer_binding_sizes{}; std::vector<BufferId> cached_write_buffer_ids; @@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -615,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template <class P> @@ -622,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template <class P> -void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } + dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template <class P> -void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache<P>::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template <class P> @@ -657,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, } template <class P> +void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; + written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; +} + +template <class P> +void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format, bool is_written, + bool is_image) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + +template <class P> void BufferCache<P>::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template <class P> @@ -677,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, } template <class P> +void BufferCache<P>::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; + written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; +} + +template <class P> +void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format, bool is_written, bool is_image) { + enabled_compute_texture_buffers |= 1U << tbo_index; + written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + +template <class P> void BufferCache<P>::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { slot_buffers[buffer_id].FlushCachedWrites(); @@ -901,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -915,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -925,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia - if (!HasFastUniformBufferBound(stage, binding_index)) { + const bool should_fast_bind = + !HasFastUniformBufferBound(stage, binding_index) || + uniform_buffer_binding_sizes[stage][binding_index] != size; + if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -934,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -948,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } ++uniform_cache_shots[0]; - if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { - // Skip binding if it's not needed and if the bound buffer is not the fast version - // This exists to avoid instances where the fast buffer is bound and a GPU write happens + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + needs_bind |= HasFastUniformBufferBound(stage, binding_index); + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + } + if (!needs_bind) { return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + uniform_buffer_binding_sizes[stage][binding_index] = size; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -985,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { } template <class P> +void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + +template <class P> void BufferCache<P>::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { return; @@ -1006,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty dirty_uniform_buffers.fill(~u32{0}); + fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1047,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { } template <class P> +void BufferCache<P>::BindHostComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + +template <class P> void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { UpdateIndexBuffer(); @@ -1056,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1063,6 +1224,7 @@ template <class P> void BufferCache<P>::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template <class P> @@ -1132,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1163,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { } template <class P> +void BufferCache<P>::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark buffer as written if needed + if (((written_texture_buffers[stage] >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } + }); +} + +template <class P> void BufferCache<P>::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { return; @@ -1193,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; @@ -1214,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } + }); +} + +template <class P> +void BufferCache<P>::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark as written if needed + if (((written_compute_texture_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } @@ -1551,6 +1736,7 @@ template <class P> void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d.dirty.flags; flags[Dirty::IndexBuffer] = true; @@ -1578,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s } template <class P> +typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + +template <class P> std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); if (IsRangeGranular(cpu_addr, size) || diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { FillBlock(table, OFF(zeta), NUM(zeta), flag); } } + +void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), + NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); +} } // Anonymous namespace void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { SetupIndexBuffer(tables); SetupDirtyDescriptors(tables); SetupDirtyRenderTargets(tables); + SetupDirtyShaders(tables); } } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -36,6 +36,8 @@ enum : u8 { IndexBuffer, + Shaders, + LastCommonEntry, }; diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <type_traits> -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" @@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read<u32>(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,10 +10,8 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -40,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +207,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read<u32>(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read<u32>(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,11 +17,9 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro/macro.h" #include "video_core/textures/texture.h" @@ -49,7 +47,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -307,10 +305,6 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } - bool IsConstant() const { - return constant; - } - bool IsValid() const { return size != Size::Invalid; } @@ -912,7 +906,11 @@ public: u32 fill_rectangle; - INSERT_PADDING_WORDS_NOINIT(0x8); + INSERT_PADDING_WORDS_NOINIT(0x2); + + u32 conservative_raster_enable; + + INSERT_PADDING_WORDS_NOINIT(0x5); std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; @@ -959,7 +957,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array<u32, 8> gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1152,7 +1154,11 @@ public: u32 index; } primitive_restart; - INSERT_PADDING_WORDS_NOINIT(0x5F); + INSERT_PADDING_WORDS_NOINIT(0xE); + + u32 provoking_vertex_last; + + INSERT_PADDING_WORDS_NOINIT(0x50); struct { u32 start_addr_high; @@ -1424,23 +1430,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } @@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); +ASSERT_REG_POSITION(conservative_raster_enable, 0x452); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(multisample_sample_locations, 0x478); ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); @@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); @@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); +ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c51776466..c7ec1eac9 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Optimized path for micro copies. const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; - if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && + regs.src_params.height > GOB_SIZE_Y) { FastCopyBlockLinearToPitch(); return; } diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <bitset> -#include <optional> -#include <tuple> -#include <vector> - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template <typename T> - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template <typename T> - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned<Tegra::Shader::Attribute> { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned<Tegra::Shader::Register> { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast<u32>(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast<u32>(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast<u16>(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast<u32>(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast<u32>(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast<s32>((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast<s32>(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array<u8, 16> mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast<u32>(target); - constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast<u32>(target); - constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast<u16>(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast<u16>(1U << bit_position); - break; - case '1': - expect |= static_cast<u16>(1U << bit_position); - mask |= static_cast<u16>(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector<Matcher> GetDecodeTable() { - std::vector<Matcher> table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <optional> - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast<PixelImap>( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional<PixelImap> result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array<u32, 0xF> raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Tegra::Engines { - -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; -static constexpr std::size_t MaxShaderTypes = 6; - -} // namespace Tegra::Engines diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <limits> -#include <vector> - -#include "common/common_types.h" -#include "video_core/guest_driver.h" - -namespace VideoCore { - -void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { - if (texture_handler_size) { - return; - } - const std::size_t size = bound_offsets.size(); - if (size < 2) { - return; - } - std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); - u32 min_val = std::numeric_limits<u32>::max(); - for (std::size_t i = 1; i < size; ++i) { - if (bound_offsets[i] == bound_offsets[i - 1]) { - continue; - } - const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; - min_val = std::min(min_val, new_min); - } - if (min_val > 2) { - return; - } - texture_handler_size = min_texture_handler_size * min_val; -} - -} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <optional> -#include <vector> - -#include "common/common_types.h" - -namespace VideoCore { - -/** - * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks as they are - * Entscheidungsproblems. - */ -class GuestDriverProfile { -public: - explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional<u32> texture_handler_size_) - : texture_handler_size{texture_handler_size_} {} - - void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); - - u32 GetTextureHandlerSize() const { - return texture_handler_size.value_or(default_texture_handler_size); - } - - bool IsTextureHandlerSizeKnown() const { - return texture_handler_size.has_value(); - } - -private: - // Minimum size of texture handler any driver can use. - static constexpr u32 min_texture_handler_size = 4; - - // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. - // Thus, certain drivers may squish the size. - static constexpr u32 default_texture_handler_size = 8; - - std::optional<u32> texture_handler_size = default_texture_handler_size; -}; - -} // namespace VideoCore diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d2b9d5f2b..882eff880 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,7 +69,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); for (const auto& map : submapped_ranges) { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" -#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -45,7 +44,7 @@ public: virtual void Clear() = 0; /// Dispatches a compute shader invocation - virtual void DispatchCompute(GPUVAddr code_addr) = 0; + virtual void DispatchCompute() = 0; /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; @@ -136,18 +135,5 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { - return guest_driver_profile; - } - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { - return guest_driver_profile; - } - -private: - GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <cstddef> -#include <string> -#include <string_view> -#include <utility> -#include <variant> - -#include <fmt/format.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template <const std::string_view& op> - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template <const std::string_view& op> - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template <const std::string_view& op> - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template <const std::string_view& op, bool unordered> - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template <const std::string_view& op, bool is_nan> - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template <const std::string_view& op, const std::string_view& type> - std::string AtomicImage(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template <const std::string_view& op, const std::string_view& type> - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template <char type> - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template <char type> - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template <char type> - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template <char type> - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template <char swizzle> - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template <char swizzle> - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template <char c1, char c2> - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template <typename... Args> - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...); - } - - template <typename... Args> - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward<Args>(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map<GlobalMemoryBase, u32> global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary<ADD_F32>, - &ARBDecompiler::Binary<MUL_F32>, - &ARBDecompiler::Binary<DIV_F32>, - &ARBDecompiler::Trinary<MAD_F32>, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary<MIN_F>, - &ARBDecompiler::Binary<MAX_F>, - &ARBDecompiler::Unary<COS_F32>, - &ARBDecompiler::Unary<SIN_F32>, - &ARBDecompiler::Unary<EX2_F32>, - &ARBDecompiler::Unary<LG2_F32>, - &ARBDecompiler::Unary<RSQ_F32>, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary<ROUND_F>, - &ARBDecompiler::Unary<FLR_F>, - &ARBDecompiler::Unary<CEIL_F>, - &ARBDecompiler::Unary<TRUNC_F>, - &ARBDecompiler::Unary<I2F_S>, - &ARBDecompiler::Unary<I2F_U>, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary<ADD_S>, - &ARBDecompiler::Binary<MUL_S>, - &ARBDecompiler::Binary<DIV_S>, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary<MIN_S>, - &ARBDecompiler::Binary<MAX_S>, - - &ARBDecompiler::Unary<TRUNC_S>, - &ARBDecompiler::Unary<MOV_U>, - &ARBDecompiler::Binary<SHL_S>, - &ARBDecompiler::Binary<SHR_U>, - &ARBDecompiler::Binary<SHR_S>, - &ARBDecompiler::Binary<AND_S>, - &ARBDecompiler::Binary<OR_S>, - &ARBDecompiler::Binary<XOR_S>, - &ARBDecompiler::Unary<NOT_S>, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary<BTC_S>, - &ARBDecompiler::Unary<BTFM_S>, - - &ARBDecompiler::Binary<ADD_U>, - &ARBDecompiler::Binary<MUL_U>, - &ARBDecompiler::Binary<DIV_U>, - &ARBDecompiler::Binary<MIN_U>, - &ARBDecompiler::Binary<MAX_U>, - &ARBDecompiler::Unary<TRUNC_U>, - &ARBDecompiler::Unary<MOV_U>, - &ARBDecompiler::Binary<SHL_U>, - &ARBDecompiler::Binary<SHR_U>, - &ARBDecompiler::Binary<SHR_U>, - &ARBDecompiler::Binary<AND_U>, - &ARBDecompiler::Binary<OR_U>, - &ARBDecompiler::Binary<XOR_U>, - &ARBDecompiler::Unary<NOT_U>, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary<BTC_U>, - &ARBDecompiler::Unary<BTFM_U>, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary<AND_U>, - &ARBDecompiler::Binary<OR_U>, - &ARBDecompiler::Binary<XOR_U>, - &ARBDecompiler::Unary<NOT_U>, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison<SLT_F, false>, - &ARBDecompiler::FloatComparison<SEQ_F, false>, - &ARBDecompiler::FloatComparison<SLE_F, false>, - &ARBDecompiler::FloatComparison<SGT_F, false>, - &ARBDecompiler::FloatComparison<SNE_F, false>, - &ARBDecompiler::FloatComparison<SGE_F, false>, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison<SLT_F, true>, - &ARBDecompiler::FloatComparison<SEQ_F, true>, - &ARBDecompiler::FloatComparison<SLE_F, true>, - &ARBDecompiler::FloatComparison<SGT_F, true>, - &ARBDecompiler::FloatComparison<SNE_F, true>, - &ARBDecompiler::FloatComparison<SGE_F, true>, - - &ARBDecompiler::Binary<SLT_S>, - &ARBDecompiler::Binary<SEQ_S>, - &ARBDecompiler::Binary<SLE_S>, - &ARBDecompiler::Binary<SGT_S>, - &ARBDecompiler::Binary<SNE_S>, - &ARBDecompiler::Binary<SGE_S>, - - &ARBDecompiler::Binary<SLT_U>, - &ARBDecompiler::Binary<SEQ_U>, - &ARBDecompiler::Binary<SLE_U>, - &ARBDecompiler::Binary<SGT_U>, - &ARBDecompiler::Binary<SNE_U>, - &ARBDecompiler::Binary<SGE_U>, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison<SLT_F, false>, - &ARBDecompiler::HalfComparison<SEQ_F, false>, - &ARBDecompiler::HalfComparison<SLE_F, false>, - &ARBDecompiler::HalfComparison<SGT_F, false>, - &ARBDecompiler::HalfComparison<SNE_F, false>, - &ARBDecompiler::HalfComparison<SGE_F, false>, - &ARBDecompiler::HalfComparison<SLT_F, true>, - &ARBDecompiler::HalfComparison<SEQ_F, true>, - &ARBDecompiler::HalfComparison<SLE_F, true>, - &ARBDecompiler::HalfComparison<SGT_F, true>, - &ARBDecompiler::HalfComparison<SNE_F, true>, - &ARBDecompiler::HalfComparison<SGE_F, true>, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage<ADD, U32>, - &ARBDecompiler::AtomicImage<AND, U32>, - &ARBDecompiler::AtomicImage<OR, U32>, - &ARBDecompiler::AtomicImage<XOR, U32>, - &ARBDecompiler::AtomicImage<EXCH, U32>, - - &ARBDecompiler::Atomic<EXCH, U32>, - &ARBDecompiler::Atomic<ADD, U32>, - &ARBDecompiler::Atomic<MIN, U32>, - &ARBDecompiler::Atomic<MAX, U32>, - &ARBDecompiler::Atomic<AND, U32>, - &ARBDecompiler::Atomic<OR, U32>, - &ARBDecompiler::Atomic<XOR, U32>, - - &ARBDecompiler::Atomic<EXCH, S32>, - &ARBDecompiler::Atomic<ADD, S32>, - &ARBDecompiler::Atomic<MIN, S32>, - &ARBDecompiler::Atomic<MAX, S32>, - &ARBDecompiler::Atomic<AND, S32>, - &ARBDecompiler::Atomic<OR, S32>, - &ARBDecompiler::Atomic<XOR, S32>, - - &ARBDecompiler::Atomic<ADD, U32>, - &ARBDecompiler::Atomic<MIN, U32>, - &ARBDecompiler::Atomic<MAX, U32>, - &ARBDecompiler::Atomic<AND, U32>, - &ARBDecompiler::Atomic<OR, U32>, - &ARBDecompiler::Atomic<XOR, U32>, - - &ARBDecompiler::Atomic<ADD, S32>, - &ARBDecompiler::Atomic<MIN, S32>, - &ARBDecompiler::Atomic<MAX, S32>, - &ARBDecompiler::Atomic<AND, S32>, - &ARBDecompiler::Atomic<OR, S32>, - &ARBDecompiler::Atomic<XOR, S32>, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary<TGBALLOT_U>, - &ARBDecompiler::Unary<TGALL_U>, - &ARBDecompiler::Unary<TGANY_U>, - &ARBDecompiler::Unary<TGEQ_U>, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast<u64>(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if<ExprAnd>(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if<ExprOr>(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if<ExprNot>(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if<ExprPredicate>(&*node)) { - return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); - } - if (const auto expr = std::get_if<ExprCondCode>(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if<ExprVar>(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if<ExprBoolean>(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast<std::size_t>(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if<GprNode>(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if<CustomVarNode>(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if<PredicateNode>(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if<AbufNode>(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if<CbufNode>(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if<GmemNode>(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if<LmemNode>(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if<SmemNode>(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { - const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if<GprNode>(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if<ImmediateNode>(&*low); - const auto* const imm_high = std::get_if<ImmediateNode>(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if<PredicateNode>(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast<u64>(index)); - } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { - const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get<ImmediateNode>(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get<ImmediateNode>(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <string> -#include <string_view> - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index a02a45e04..07a995f7d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <span> #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; + +[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { + switch (gl_format) { + case GL_RGBA8_SNORM: + return GL_RGBA8; + case GL_R8_SNORM: + return GL_R8; + case GL_RGBA16_SNORM: + return GL_RGBA16; + case GL_R16_SNORM: + return GL_R16; + case GL_RG16_SNORM: + return GL_RG16; + case GL_RG8_SNORM: + return GL_RG8; + default: + return gl_format; + } +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + const GLenum texture_format{GetTextureBufferFormat(gl_format)}; + if (texture_format != gl_format) { + LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); + } + glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast<GLsizeiptr>(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); @@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + const GLuint base_binding = graphics_base_storage_bindings[stage]; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast<GLsizei>(size), @@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast<const GLuint*>(&ssbo)); - } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; - const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), - static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } } void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + if (size != 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast<GLsizei>(size), @@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, reinterpret_cast<const GLuint*>(&ssbo)); - } else if (size == 0) { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), - static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } } @@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fe91aa452..060d36427 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector<BufferView> views; }; class BufferCacheRuntime { @@ -75,17 +85,21 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); if (use_assembly_shaders) { - const GLuint handle = fast_uniforms[stage][binding_index].handle; - const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - fast_uniforms[stage][binding_index].handle, 0, - static_cast<GLsizeiptr>(size)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size); } } @@ -103,7 +117,7 @@ public: std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); @@ -118,6 +132,27 @@ public: return has_fast_buffer_sub_data; } + [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept { + return !use_assembly_shaders; + } + + void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + + void SetEnableStorageBuffers(bool use_storage_buffers_) { + use_storage_buffers = use_storage_buffers_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -131,8 +166,15 @@ private: bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + bool use_storage_buffers = false; + u32 max_attributes = 0; + std::array<GLuint, 5> graphics_base_uniform_bindings{}; + std::array<GLuint, 5> graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional<StreamBuffer> stream_buffer; std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, @@ -156,6 +198,7 @@ struct BufferCacheParams { static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp new file mode 100644 index 000000000..aa1cc592f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -0,0 +1,209 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> + +#include "common/cityhash.h" +#include "common/settings.h" // for enum class Settings::ShaderBackend +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +template <typename Range> +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + +size_t ComputePipelineKey::Hash() const noexcept { + return static_cast<size_t>( + Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); +} + +bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + std::string code, std::vector<u32> code_v) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + source_program = CreateProgram(code, GL_COMPUTE_SHADER); + break; + case Settings::ShaderBackend::GLASM: + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + break; + case Settings::ShaderBackend::SPIRV: + source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); + break; + } + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + + num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); + num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + + const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; + ASSERT(num_textures <= MAX_TEXTURES); + + const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; + ASSERT(num_images <= MAX_IMAGES); + + const bool is_glasm{assembly_program.handle != 0}; + const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + use_storage_buffers = + !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory = !use_storage_buffers && + std::ranges::any_of(info.storage_buffers_descriptors, + [](const auto& desc) { return desc.is_written; }); +} + +void ComputePipeline::Configure() { + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; + boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; + std::array<GLuint, MAX_TEXTURES> samplers; + std::array<GLuint, MAX_TEXTURES> textures; + std::array<GLuint, MAX_IMAGES> images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || + std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + if (assembly_program.handle != 0) { + program_manager.BindComputeAssemblyProgram(assembly_program.handle); + } else { + program_manager.BindComputeProgram(source_program.handle); + } + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h new file mode 100644 index 000000000..50c676365 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -0,0 +1,93 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <type_traits> +#include <utility> + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class Device; +class ProgramManager; + +struct ComputePipelineKey { + u64 unique_hash; + u32 shared_memory_size; + std::array<u32, 3> workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputePipelineKey&) const noexcept; + + bool operator!=(const ComputePipelineKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v<ComputePipelineKey>); +static_assert(std::is_trivially_copyable_v<ComputePipelineKey>); +static_assert(std::is_trivially_constructible_v<ComputePipelineKey>); + +class ComputePipeline { +public: + explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + std::string code, std::vector<u32> code_v); + + void Configure(); + + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash<OpenGL::ComputePipelineKey> { + size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..9692b8e94 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,39 +17,17 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template <typename T> T GetInteger(GLenum pname) { @@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger<u32>(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - -std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { - std::array<u32, Tegra::Engines::MaxShaderTypes> max; - std::ranges::transform(LIMIT_UBOS, max.begin(), - [](GLenum pname) { return GetInteger<u32>(pname); }); +std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept { + std::array<u32, Shader::MaxStageTypes> max; + std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>); return max; } -std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { - std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; - - static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { - static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; - static constexpr std::array formats = { + static constexpr std::array targets{ + GL_TEXTURE_2D, + GL_TEXTURE_2D_ARRAY, + }; + static constexpr std::array formats{ GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, @@ -172,11 +87,10 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - static constexpr std::array required_support = { + static constexpr std::array required_support{ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, }; - for (const GLenum target : targets) { for (const GLenum format : formats) { for (const GLenum support : required_support) { @@ -223,14 +137,13 @@ Device::Device() { "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); + max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -243,18 +156,30 @@ Device::Device() { has_precise_bug = TestPreciseBug(); has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_derivative_control = GLAD_GL_ARB_derivative_control; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; + has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); + has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; + warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; + need_fastmath_off = is_nvidia; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && + shader_backend = Settings::values.shader_backend.GetValue(); + use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; - + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { + LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); + shader_backend = Settings::ShaderBackend::GLSL; + } // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)); @@ -265,11 +190,6 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - - if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { - LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); - } - if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); } @@ -325,22 +245,6 @@ std::string Device::GetVendorName() const { return vendor_name; } -Device::Device(std::nullptr_t) { - max_uniform_buffers.fill(std::numeric_limits<u32>::max()); - uniform_buffer_alignment = 4; - shader_storage_alignment = 4; - max_vertex_attributes = 16; - max_varyings = 15; - max_compute_shared_memory_size = 0x10000; - has_warp_intrinsics = true; - has_shader_ballot = true; - has_vertex_viewport_layer = true; - has_image_load_formatted = true; - has_texture_shadow_lod = true; - has_variable_aoffi = true; - has_depth_buffer_float = true; -} - bool Device::TestVariableAoffi() { return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..ee992aed4 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,34 +6,22 @@ #include <cstddef> #include "common/common_types.h" -#include "video_core/engines/shader_type.h" +#include "shader_recompiler/stage.h" + +namespace Settings { +enum class ShaderBackend : u32; +}; namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); - explicit Device(std::nullptr_t); [[nodiscard]] std::string GetVendorName() const; - u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { - return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; - } - - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast<std::size_t>(shader_type)); + u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { + return max_uniform_buffers[static_cast<size_t>(stage)]; } size_t GetUniformBufferAlignment() const { @@ -56,6 +44,10 @@ public: return max_compute_shared_memory_size; } + u32 GetMaxGLASMStorageBufferBlocks() const { + return max_glasm_storage_buffer_blocks; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -108,6 +100,10 @@ public: return has_nv_viewport_array2; } + bool HasDerivativeControl() const { + return has_derivative_control; + } + bool HasDebuggingToolAttached() const { return has_debugging_tool_attached; } @@ -128,18 +124,52 @@ public: return has_depth_buffer_float; } + bool HasGeometryShaderPassthrough() const { + return has_geometry_shader_passthrough; + } + + bool HasNvGpuShader5() const { + return has_nv_gpu_shader_5; + } + + bool HasShaderInt64() const { + return has_shader_int64; + } + + bool HasAmdShaderHalfFloat() const { + return has_amd_shader_half_float; + } + + bool HasSparseTexture2() const { + return has_sparse_texture_2; + } + + bool IsWarpSizePotentiallyLargerThanGuest() const { + return warp_size_potentially_larger_than_guest; + } + + bool NeedsFastmathOff() const { + return need_fastmath_off; + } + + Settings::ShaderBackend GetShaderBackend() const { + return shader_backend; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::string vendor_name; - std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; - std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; + std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; + u32 max_glasm_storage_buffer_blocks{}; + + Settings::ShaderBackend shader_backend{}; + bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; @@ -153,11 +183,21 @@ private: bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_derivative_control{}; bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_geometry_shader_passthrough{}; + bool has_nv_gpu_shader_5{}; + bool has_shader_int64{}; + bool has_amd_shader_half_float{}; + bool has_sparse_texture_2{}; + bool warp_size_potentially_larger_than_guest{}; + bool need_fastmath_off{}; + + std::string vendor_name; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp new file mode 100644 index 000000000..fac0034fb --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -0,0 +1,572 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <string> +#include <vector> + +#include "common/settings.h" // for enum class Settings::ShaderBackend +#include "common/thread_worker.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/shader_notify.h" +#include "video_core/texture_cache/texture_cache.h" + +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + +namespace OpenGL { +namespace { +using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +template <typename Range> +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} + +template <typename Spec> +bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) { + for (size_t stage = 0; stage < stage_infos.size(); ++stage) { + if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template <typename Spec, typename... Specs> +ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes<Spec>(stage_infos, enabled_mask)) { + return FindSpec<Specs...>(stage_infos, enabled_mask); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc<Spec>(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { + static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) { + return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask); +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline( + const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources, + std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos, + const GraphicsPipelineKey& key_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, key{key_} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + auto& info{stage_infos[stage]}; + if (infos[stage]) { + info = *infos[stage]; + enabled_stages_mask |= 1u << stage; + } + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); + + if (key.xfb_enabled && device.UseAssemblyShaders()) { + GenerateTransformFeedbackState(); + } + const bool in_parallel = thread_worker != nullptr; + const auto backend = device.GetShaderBackend(); + auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { + for (size_t stage = 0; stage < 5; ++stage) { + switch (backend) { + case Settings::ShaderBackend::GLSL: + if (!sources[stage].empty()) { + source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); + } + break; + case Settings::ShaderBackend::GLASM: + if (!sources[stage].empty()) { + assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (in_parallel) { + // Make sure program is built before continuing when building in parallel + glGetString(GL_PROGRAM_ERROR_STRING_NV); + } + } + break; + case Settings::ShaderBackend::SPIRV: + if (!sources_spirv[stage].empty()) { + source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); + } + break; + } + } + if (in_parallel && backend != Settings::ShaderBackend::GLASM) { + // Make sure programs have built if we are building shaders in parallel + for (OGLProgram& program : source_programs) { + if (program.handle != 0) { + GLint status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &status); + } + } + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built = true; + built_condvar.notify_one(); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(nullptr); + } +} + +template <typename Spec> +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { + std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; + std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; + std::array<GLuint, MAX_TEXTURES> samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> || + std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + if (!is_built.load(std::memory_order::relaxed)) { + WaitForBuild(); + } + if (assembly_programs[0].handle != 0) { + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); + } else { + program_manager.BindSourcePrograms(source_programs); + } + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array<GLuint, MAX_TEXTURES> textures; + std::array<GLuint, MAX_IMAGES> images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + +void GraphicsPipeline::GenerateTransformFeedbackState() { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = key.xfb_state.layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast<GLint>(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = key.xfb_state.varyings[feedback]; + std::optional<u8> current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data()); +} + +void GraphicsPipeline::WaitForBuild() { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h new file mode 100644 index 000000000..4e28d9a42 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -0,0 +1,169 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstring> +#include <type_traits> +#include <utility> + +#include "common/bit_field.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" + +namespace OpenGL { + +namespace ShaderContext { +struct Context; +} + +class Device; +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; + +struct GraphicsPipelineKey { + std::array<u64, 6> unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array<u32, 3> padding; + VideoCommon::TransformFeedbackState xfb_state; + + size_t Hash() const noexcept { + return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size())); + } + + bool operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; + } + + bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsPipelineKey); + } else { + return offsetof(GraphicsPipelineKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>); +static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>); +static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>); + +class GraphicsPipeline { +public: + explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, + std::array<std::string, 5> sources, + std::array<std::vector<u32>, 5> sources_spirv, + const std::array<const Shader::Info*, 5>& infos, + const GraphicsPipelineKey& key_); + + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } + + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + + [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept { + return key; + } + + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + + template <typename Spec> + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl<Spec>(is_indexed); + }; + } + +private: + template <typename Spec> + void ConfigureImpl(bool is_indexed); + + void ConfigureTransformFeedbackImpl() const; + + void GenerateTransformFeedbackState(); + + void WaitForBuild(); + + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + const GraphicsPipelineKey key; + + void (*configure_func)(GraphicsPipeline*, bool){}; + + std::array<OGLProgram, 5> source_programs; + std::array<OGLAssemblyProgram, 5> assembly_programs; + u32 enabled_stages_mask{}; + + std::array<Shader::Info, 5> stage_infos{}; + std::array<u32, 5> enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; + std::array<u32, 5> base_uniform_bindings{}; + std::array<u32, 5> base_storage_bindings{}; + std::array<u32, 5> num_texture_buffers{}; + std::array<u32, 5> num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; + std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + std::atomic_bool is_built{false}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash<OpenGL::GraphicsPipelineKey> { + size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -23,7 +23,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" @@ -40,7 +39,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using GLvec4 = std::array<GLfloat, 4>; -using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); namespace { - constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template <typename Engine, typename Entry> -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v<Entry, SamplerEntry>) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} - void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { const auto gl_index = static_cast<GLuint>(index); // Disable constant attributes. - if (attrib.IsConstant()) { + if (attrib.constant) { glDisableVertexAttribArray(gl_index); continue; } @@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast<Maxwell::ShaderProgram>(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); + shader_cache.LoadDiskResources(title_id, stop_loading, callback); } void RasterizerOpenGL::Clear() { @@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. + GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); - - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + pipeline->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(primitive_mode); + BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); +void RasterizerOpenGL::DispatchCompute() { + ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + pipeline->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() { } void RasterizerOpenGL::FragmentBarrier() { - glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); } void RasterizerOpenGL::TiledCacheBarrier() { @@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() { return; } num_queued_commands = 0; + + // Make sure memory stored from the previous GL command stream is visible + // This is only needed on assembly shaders where we write to GPU memory with raw pointers + if (has_written_global_memory) { + has_written_global_memory = false; + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + } glFlush(); } @@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); - glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast<ShaderType>(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast<ShaderType>(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); @@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; @@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::SyncTransformFeedback() { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs = maxwell3d.regs; - - static constexpr std::size_t STRIDE = 3; - std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; - std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams; - - GLint* cursor = attribs.data(); - GLint* current_stream = streams.data(); - - for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - - *current_stream = static_cast<GLint>(feedback); - if (current_stream != streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional<u8> current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += STRIDE; - } - } - - const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE); - const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data()); - glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), - GL_INTERLEAVED_ATTRIBS); -} - -void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } - if (device.UseAssemblyShaders()) { - SyncTransformFeedback(); - } + program->ConfigureTransformFeedback(); + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = maxwell3d.regs; - if (regs.tfb_enabled == 0) { - return; + if (maxwell3d.regs.tfb_enabled != 0) { + glEndTransformFeedback(); } - glEndTransformFeedback(); } AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -220,18 +193,12 @@ private: /// Syncs vertex instances to match the guest state void SyncVertexInstances(); - /// Syncs transform feedback state to match guest state - /// @note Only valid on assembly shaders - void SyncTransformFeedback(); - /// Begin a transform feedback - void BeginTransformFeedback(GLenum primitive_mode); + void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -246,13 +213,11 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - ShaderCacheOpenGL shader_cache; + ShaderCache shader_cache; QueryCache query_cache; AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; @@ -260,7 +225,8 @@ private: std::array<GLuint, MAX_IMAGES> image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. - std::size_t num_queued_commands = 0; + size_t num_queued_commands = 0; + bool has_written_global_memory = false; u32 last_clip_distance_mask = 0; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3428e5e21..8695c29e3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -83,18 +83,6 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(std::string_view source, GLenum type) { - if (handle != 0) { - return; - } - if (source.empty()) { - return; - } - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = GLShader::LoadShader(source, type); -} - void OGLShader::Release() { if (handle == 0) return; @@ -104,21 +92,6 @@ void OGLShader::Release() { handle = 0; } -void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, - const char* frag_shader, bool separable_program, - bool hint_retrievable) { - OGLShader vert, geo, frag; - if (vert_shader) - vert.Create(vert_shader, GL_VERTEX_SHADER); - if (geo_shader) - geo.Create(geo_shader, GL_GEOMETRY_SHADER); - if (frag_shader) - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle); -} - void OGLProgram::Release() { if (handle == 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 552d79db4..b2d5bfd3b 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -8,7 +8,6 @@ #include <utility> #include <glad/glad.h> #include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -128,8 +127,6 @@ public: return *this; } - void Create(std::string_view source, GLenum type); - void Release(); GLuint handle = 0; @@ -151,17 +148,6 @@ public: return *this; } - template <typename... T> - void Create(bool separable_program, bool hint_retrievable, T... shaders) { - if (handle != 0) - return; - handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...); - } - - /// Creates a new internal OpenGL resource and stores the handle - void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, - bool separable_program = false, bool hint_retrievable = false); - /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..8d6cc074c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,606 +3,544 @@ // Refer to the license.txt file included. #include <atomic> +#include <fstream> #include <functional> #include <mutex> -#include <optional> #include <string> #include <thread> -#include <unordered_set> #include "common/alignment.h" #include "common/assert.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/settings.h" +#include "common/thread_worker.h" #include "core/core.h" -#include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } +using Shader::Backend::GLASM::EmitGLASM; +using Shader::Backend::GLSL::EmitGLSL; +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; +using VideoCommon::LoadPipelines; +using VideoCommon::SerializePipeline; +using Context = ShaderContext::Context; + +constexpr u32 CACHE_VERSION = 5; + +template <typename Container> +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); } -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program, + bool glasm_use_storage_buffers, bool use_assembly_shaders) { + Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores = previous_program->info.stores; + } else { + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); + } + switch (program.stage) { + case Shader::Stage::VertexB: + case Shader::Stage::Geometry: + if (!use_assembly_shaders && key.xfb_enabled != 0) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + } + break; + case Shader::Stage::TessellationEval: + info.tess_clockwise = key.tessellation_clockwise != 0; + info.tess_primitive = [&key] { + switch (key.tessellation_primitive) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + switch (key.tessellation_spacing) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Fragment: + info.force_early_z = key.early_z != 0; + break; + default: + break; + } + switch (key.gs_input_topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + info.glasm_use_storage_buffers = glasm_use_storage_buffers; + return info; } -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; +void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; } +} // Anonymous namespace -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; +ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, + shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, + profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_int64 = device.HasShaderInt64(), + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = device.HasNvViewportArray2(), + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), + .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), + .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), + .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_warp_intrinsics = false, + .support_gl_variable_aoffi = device.HasVariableAoffi(), + .support_gl_sparse_textures = device.HasSparseTexture2(), + .support_gl_derivative_control = device.HasDerivativeControl(), + + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), + + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .has_broken_fp16_float_controls = false, + .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), + .has_gl_precise_bug = device.HasPreciseBug(), + .ignore_nan_fp_comparisons = true, + .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), + }, + host_info{ + .support_float16 = false, + .support_int64 = device.HasShaderInt64(), + } { + if (use_asynchronous_shaders) { + workers = CreateWorkers(); } - return {}; } -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} +ShaderCache::~ShaderCache() = default; -std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared<Registry>(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); +void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories"); + return; } - return registry; -} - -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + shader_cache_filename = base_dir / "opengl.bin"; + + if (!workers) { + workers = CreateWorkers(); + } + struct { + std::mutex mutex; + size_t total{}; + size_t built{}; + bool has_loaded{}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + workers->QueueWork( + [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + ctx->pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; + std::lock_guard lock{state.mutex}; + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { + GraphicsPipelineKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + workers->QueueWork( + [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + boost::container::static_vector<Shader::Environment*, 5> env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + ctx->pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + std::lock_guard lock{state.mutex}; + if (pipeline) { + graphics_cache.emplace(key, std::move(pipeline)); + } + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); + workers->WaitForRequests(); + if (!use_asynchronous_shaders) { + workers.reset(); } - return supported_formats; } -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared<ProgramHandle>(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), - reinterpret_cast<const GLubyte*>(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast<GLsizei>(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; +GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { + if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0); + if (graphics_key.xfb_enabled) { + SetXfbState(graphics_key.xfb_state, regs); + } + if (current_pipeline && graphics_key == current_pipeline->Key()) { + return BuiltPipeline(current_pipeline); + } + return CurrentGraphicsPipelineSlowPath(); } -Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; +GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - if (is_built) { - ASSERT(handle != 0); + if (!pipeline) { + return nullptr; } + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); } -Shader::~Shader() = default; - -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr<Shader> Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr<Shader>(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared<ProgramHandle>(); - return std::unique_ptr<Shader>( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); +GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; } -} - -std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr<Shader>(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr<Shader>(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) - : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} - -ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; - -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; + if (!use_asynchronous_shaders) { + return pipeline; } - - std::vector<ShaderDiskCachePrecompiled> gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; } + return nullptr; +} - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); - std::vector<std::thread> threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); +ComputePipeline* ShaderCache::CurrentComputePipeline() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; } - for (auto& thread : threads) { - thread.join(); + const auto& qmd{kepler_compute.launch_description}; + const ComputePipelineKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); } + pipeline = CreateComputePipeline(key, shader); + return pipeline.get(); +} - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } +std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), + use_asynchronous_shaders)}; + if (!pipeline || shader_cache_filename.empty()) { + return pipeline; } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; + boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] != 0) { + env_ptrs.push_back(&environments.envs[index]); } } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set<GLenum>& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared<ProgramHandle>(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast<GLsizei>(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION); + return pipeline; } -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; - if (last_shader->IsBuilt()) { - return last_shader; +std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span<Shader::Environment* const> envs, bool build_in_parallel) try { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{}; + u32 total_storage_buffers{}; + std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; } - } + Shader::Environment& env{*envs[env_index]}; + ++env_index; - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); + } else { + // VertexB path when VertexA is present. + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + for (const auto& desc : program_vb.info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } - - // Look up shader in the cache based on address - const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast<std::size_t>(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr<Shader> shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); + const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; + const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; + + std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; + + OGLProgram source_program; + std::array<std::string, 5> sources; + std::array<std::vector<u32>, 5> sources_spirv; + Shader::Backend::Bindings binding; + Shader::IR::Program* previous_program{}; + const bool use_glasm{device.UseAssemblyShaders()}; + const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; + for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const auto runtime_info{ + MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::GLASM: + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::SPIRV: + sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); + break; + } + previous_program = &program; } + auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + return std::make_unique<GraphicsPipeline>( + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, sources_spirv, infos, key); - return last_shaders[static_cast<std::size_t>(program)] = result; +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; +std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( + const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline || shader_cache_filename.empty()) { + return pipeline; + } + SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename, + CACHE_VERSION); + return pipeline; +} - std::unique_ptr<Shader> kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } +std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, + Shader::Environment& env) try { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + + u32 num_storage_buffers{}; + for (const auto& desc : program.info.storage_buffers_descriptors) { + num_storage_buffers += desc.count; + } + Shader::RuntimeInfo info; + info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + + std::string code{}; + std::vector<u32> code_spirv; + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + code = EmitGLSL(profile, program); + break; + case Settings::ShaderBackend::GLASM: + code = EmitGLASM(profile, info, program); + break; + case Settings::ShaderBackend::SPIRV: + code_spirv = EmitSPIRV(profile, program); + break; + } + + return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory, + kepler_compute, program_manager, program.info, code, + code_spirv); +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; +} - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; +std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const { + return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..a34110b37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,157 +5,93 @@ #pragma once #include <array> -#include <atomic> -#include <bitset> -#include <memory> -#include <string> -#include <tuple> +#include <filesystem> +#include <stop_token> #include <unordered_map> -#include <unordered_set> -#include <vector> #include <glad/glad.h> #include "common/common_types.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" +#include "common/thread_worker.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_context.h" #include "video_core/shader_cache.h" namespace Tegra { class MemoryManager; } -namespace Core::Frontend { -class EmuWindow; -} - -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; +using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr<VideoCommon::Shader::Registry> registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class ShaderCache : public VideoCommon::ShaderCache { public: - ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); + explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_); + ~ShaderCache(); - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); - static std::unique_ptr<Shader> CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); - static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: - explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr<VideoCommon::Shader::Registry> registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; -}; + GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); -class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { -public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); - ~ShaderCacheOpenGL() override; + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); + std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); + std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span<Shader::Environment* const> envs, bool build_in_parallel); - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); + std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key, + const VideoCommon::ShaderInfo* shader); -private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set<GLenum>& supported_formats); + std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env); + + std::unique_ptr<ShaderWorker> CreateWorkers() const; Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + VideoCore::ShaderNotify& shader_notify; + const bool use_asynchronous_shaders; + + GraphicsPipelineKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map<u64, PrecompiledShader> runtime_cache; + ShaderContext::ShaderPools main_pools; + std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; + std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; - std::unique_ptr<Shader> null_shader; - std::unique_ptr<Shader> null_kernel; + Shader::Profile profile; + Shader::HostTranslateInfo host_info; - std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; + std::filesystem::path shader_cache_filename; + std::unique_ptr<ShaderWorker> workers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/emu_window.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace OpenGL::ShaderContext { +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool<Shader::IR::Inst> inst; + Shader::ObjectPool<Shader::IR::Block> block; + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; +}; + +struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr<Core::Frontend::GraphicsContext> gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + +} // namespace OpenGL::ShaderContext diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> -#include <string> -#include <string_view> -#include <utility> -#include <variant> -#include <vector> - -#include <fmt/format.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair<Type, Node>; -using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template <typename... Args> - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return "<invalid type>"; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && - static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast<InternalFlag>(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast<u8>(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast<u8>(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast<std::size_t>(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if<GprNode>(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if<CustomVarNode>(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if<PredicateNode>(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if<AbufNode>(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if<CbufNode>(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if<LmemNode>(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if<SmemNode>(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if<CommentNode>(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector<TextureIR>& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if<TextureArgument>(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative<TextureOffset>(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative<TextureDerivates>(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast<u32>(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if<ImmediateNode>(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast<s32>(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector<Node>& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector<Node>& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if<GprNode>(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template <Type type> - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template <Type type> - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template <Type type> - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template <Type type> - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template <Type type> - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template <Type type> - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template <Type type> - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template <Type type> - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template <Type type> - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template <Type type> - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template <Type type> - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template <Type type> - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template <Type type> - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template <Type type> - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template <Type type> - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template <Type type> - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template <Type type> - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template <Type type> - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template <const std::string_view& op, Type type, bool unordered = false> - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if<PredicateNode>(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template <bool with_nan> - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template <bool with_nan> - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "lessThan"); - } - - template <bool with_nan> - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "equal"); - } - - template <bool with_nan> - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); - } - - template <bool with_nan> - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "greaterThan"); - } - - template <bool with_nan> - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "notEqual"); - } - - template <bool with_nan> - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get<MetaTexture>(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector<TextureIR> extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector<TextureIR> ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template <const std::string_view& opname> - Expression AtomicImage(Operation operation) { - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template <const std::string_view& opname, Type type> - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template <const std::string_view& opname, Type type> - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - const auto target = std::get_if<ImmediateNode>(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template <u32 element> - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template <u32 element> - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template <const std::string_view& comparison> - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add<Type::Float>, - &GLSLDecompiler::Mul<Type::Float>, - &GLSLDecompiler::Div<Type::Float>, - &GLSLDecompiler::Fma<Type::Float>, - &GLSLDecompiler::Negate<Type::Float>, - &GLSLDecompiler::Absolute<Type::Float>, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min<Type::Float>, - &GLSLDecompiler::Max<Type::Float>, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger<Type::Int>, - &GLSLDecompiler::FCastInteger<Type::Uint>, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add<Type::Int>, - &GLSLDecompiler::Mul<Type::Int>, - &GLSLDecompiler::Div<Type::Int>, - &GLSLDecompiler::Negate<Type::Int>, - &GLSLDecompiler::Absolute<Type::Int>, - &GLSLDecompiler::Min<Type::Int>, - &GLSLDecompiler::Max<Type::Int>, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft<Type::Int>, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd<Type::Int>, - &GLSLDecompiler::BitwiseOr<Type::Int>, - &GLSLDecompiler::BitwiseXor<Type::Int>, - &GLSLDecompiler::BitwiseNot<Type::Int>, - &GLSLDecompiler::BitfieldInsert<Type::Int>, - &GLSLDecompiler::BitfieldExtract<Type::Int>, - &GLSLDecompiler::BitCount<Type::Int>, - &GLSLDecompiler::BitMSB<Type::Int>, - - &GLSLDecompiler::Add<Type::Uint>, - &GLSLDecompiler::Mul<Type::Uint>, - &GLSLDecompiler::Div<Type::Uint>, - &GLSLDecompiler::Min<Type::Uint>, - &GLSLDecompiler::Max<Type::Uint>, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd<Type::Uint>, - &GLSLDecompiler::BitwiseOr<Type::Uint>, - &GLSLDecompiler::BitwiseXor<Type::Uint>, - &GLSLDecompiler::BitwiseNot<Type::Uint>, - &GLSLDecompiler::BitfieldInsert<Type::Uint>, - &GLSLDecompiler::BitfieldExtract<Type::Uint>, - &GLSLDecompiler::BitCount<Type::Uint>, - &GLSLDecompiler::BitMSB<Type::Uint>, - - &GLSLDecompiler::Add<Type::HalfFloat>, - &GLSLDecompiler::Mul<Type::HalfFloat>, - &GLSLDecompiler::Fma<Type::HalfFloat>, - &GLSLDecompiler::Absolute<Type::HalfFloat>, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>, - - &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Int>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>, - - &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan<false>, - &GLSLDecompiler::Logical2HEqual<false>, - &GLSLDecompiler::Logical2HLessEqual<false>, - &GLSLDecompiler::Logical2HGreaterThan<false>, - &GLSLDecompiler::Logical2HNotEqual<false>, - &GLSLDecompiler::Logical2HGreaterEqual<false>, - &GLSLDecompiler::Logical2HLessThan<true>, - &GLSLDecompiler::Logical2HEqual<true>, - &GLSLDecompiler::Logical2HLessEqual<true>, - &GLSLDecompiler::Logical2HGreaterThan<true>, - &GLSLDecompiler::Logical2HNotEqual<true>, - &GLSLDecompiler::Logical2HGreaterEqual<true>, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage<Func::Add>, - &GLSLDecompiler::AtomicImage<Func::And>, - &GLSLDecompiler::AtomicImage<Func::Or>, - &GLSLDecompiler::AtomicImage<Func::Xor>, - &GLSLDecompiler::AtomicImage<Func::Exchange>, - - &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, - &GLSLDecompiler::Atomic<Func::And, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, - - &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, - &GLSLDecompiler::Atomic<Func::Add, Type::Int>, - &GLSLDecompiler::Atomic<Func::Min, Type::Int>, - &GLSLDecompiler::Atomic<Func::Max, Type::Int>, - &GLSLDecompiler::Atomic<Func::And, Type::Int>, - &GLSLDecompiler::Atomic<Func::Or, Type::Int>, - &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, - - &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, - &GLSLDecompiler::Reduce<Func::And, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, - - &GLSLDecompiler::Reduce<Func::Add, Type::Int>, - &GLSLDecompiler::Reduce<Func::Min, Type::Int>, - &GLSLDecompiler::Reduce<Func::Max, Type::Int>, - &GLSLDecompiler::Reduce<Func::And, Type::Int>, - &GLSLDecompiler::Reduce<Func::Or, Type::Int>, - &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask<Func::Eq>, - &GLSLDecompiler::ThreadMask<Func::Ge>, - &GLSLDecompiler::ThreadMask<Func::Gt>, - &GLSLDecompiler::ThreadMask<Func::Le>, - &GLSLDecompiler::ThreadMask<Func::Lt>, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast<u32>(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map<u8, GenericVaryingDescription> varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast<u32>(flag); - ASSERT(index < static_cast<u32>(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map<u8, VaryingTFB> transform_feedback; - - ShaderWriter code; - - std::optional<u32> max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <string> -#include <string_view> -#include <utility> -#include <vector> -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector<ConstBufferEntry> const_buffers; - std::vector<GlobalMemoryEntry> global_memory_entries; - std::vector<SamplerEntry> samplers; - std::vector<ImageEntry> images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array<u8, 64>; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector<ConstBufferKey> flat_keys(num_keys); - std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); - std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); - std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast<u32>(type)) || - !file.WriteObject(static_cast<u32>(code.size())) || - !file.WriteObject(static_cast<u32>(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) || - !file.WriteObject(static_cast<u32>(bound_samplers.size())) || - !file.WriteObject(static_cast<u32>(separate_samplers.size())) || - !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) { - return false; - } - - std::vector<ConstBufferKey> flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector<BoundSamplerEntry> flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector<SeparateSamplerEntry> flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector<BindlessSamplerEntry> flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector<ShaderDiskCacheEntry> entries; - while (static_cast<u64>(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector<u8> compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector<ShaderDiskCachePrecompiled> entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector<u8> binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector<u8> compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <filesystem> -#include <optional> -#include <string> -#include <tuple> -#include <type_traits> -#include <unordered_map> -#include <unordered_set> -#include <utility> -#include <vector> - -#include <glad/glad.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector<u64>; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional<u32> texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector<u8> binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template <typename T> - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template <typename T> - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template <typename T> - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast<u8>(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template <typename T> - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set<u64> stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..d7ef0775d 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,142 @@ #pragma once -#include <cstddef> +#include <array> +#include <span> #include <glad/glad.h> +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { -public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); - - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); - - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); + static constexpr size_t NUM_STAGES = 5; - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); - -private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; + static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; +public: + explicit ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } + } + + void BindComputeProgram(GLuint program) { + glUseProgram(program); + is_compute_bound = true; + } + + void BindComputeAssemblyProgram(GLuint program) { + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + UnbindPipeline(); + } + + void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { + static constexpr std::array<GLenum, 5> stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); + } + + void BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } + } + BindPipeline(); + } + + void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, + u32 stage_mask) { + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + UnbindPipeline(); + } + + void RestoreGuestCompute() {} - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; +private: + void BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); + } + + void UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); + } + + void UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } + } + + OGLPipeline pipeline; + bool is_pipeline_bound{}; + bool is_compute_bound{}; + + u32 current_stage_mask = 0; + std::array<GLuint, NUM_STAGES> current_programs{}; + GLuint current_assembly_compute_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4bf0d6090..d432072ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -5,57 +5,108 @@ #include <string_view> #include <vector> #include <glad/glad.h> + #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/renderer_opengl/gl_shader_util.h" -namespace OpenGL::GLShader { +namespace OpenGL { -namespace { +static OGLProgram LinkSeparableProgram(GLuint shader) { + OGLProgram program; + program.handle = glCreateProgram(); + glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glAttachShader(program.handle, shader); + glLinkProgram(program.handle); + if (!Settings::values.renderer_debug) { + return program; + } + GLint link_status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status); -std::string_view StageDebugName(GLenum type) { - switch (type) { - case GL_VERTEX_SHADER: - return "vertex"; - case GL_GEOMETRY_SHADER: - return "geometry"; - case GL_FRAGMENT_SHADER: - return "fragment"; - case GL_COMPUTE_SHADER: - return "compute"; + GLint log_length{}; + glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return program; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program.handle, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); } - UNIMPLEMENTED(); - return "unknown"; + return program; } -} // Anonymous namespace +static void LogShader(GLuint shader, std::string_view code = {}) { + GLint shader_status{}; + glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + if (!code.empty()) { + LOG_INFO(Render_OpenGL, "\n{}", code); + } + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} -GLuint LoadShader(std::string_view source, GLenum type) { - const std::string_view debug_type = StageDebugName(type); - const GLuint shader_id = glCreateShader(type); +OGLProgram CreateProgram(std::string_view code, GLenum stage) { + OGLShader shader; + shader.handle = glCreateShader(stage); - const GLchar* source_string = source.data(); - const GLint source_length = static_cast<GLint>(source.size()); + const GLint length = static_cast<GLint>(code.size()); + const GLchar* const code_ptr = code.data(); + glShaderSource(shader.handle, 1, &code_ptr, &length); + glCompileShader(shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle, code); + } + return LinkSeparableProgram(shader.handle); +} - glShaderSource(shader_id, 1, &source_string, &source_length); - LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); - glCompileShader(shader_id); +OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) { + OGLShader shader; + shader.handle = glCreateShader(stage); - GLint result = GL_FALSE; - GLint info_log_length; - glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast<GLsizei>(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } + return LinkSeparableProgram(shader.handle); +} - if (info_log_length > 1) { - std::string shader_error(info_log_length, ' '); - glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", shader_error); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast<GLsizei>(code.size()), code.data()); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } } } - return shader_id; + return program; } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1b770532e..4e1a2a8e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,92 +4,23 @@ #pragma once +#include <span> #include <string> +#include <string_view> #include <vector> + #include <glad/glad.h> + #include "common/assert.h" #include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" -namespace OpenGL::GLShader { - -/** - * Utility function to log the source code of a list of shaders. - * @param shaders The OpenGL shaders whose source we will print. - */ -template <typename... T> -void LogShaderSource(T... shaders) { - auto shader_list = {shaders...}; - - for (const auto& shader : shader_list) { - if (shader == 0) - continue; - - GLint source_length; - glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length); - - std::string source(source_length, ' '); - glGetShaderSource(shader, source_length, nullptr, &source[0]); - LOG_INFO(Render_OpenGL, "Shader source {}", source); - } -} - -/** - * Utility function to create and compile an OpenGL GLSL shader - * @param source String of the GLSL shader program - * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) - */ -GLuint LoadShader(std::string_view source, GLenum type); - -/** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param separable_program whether to create a separable program - * @param shaders ID of shaders to attach to the program - * @returns Handle of the newly created OpenGL program object - */ -template <typename... T> -GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) { - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - - ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - if (hint_retrievable) { - glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - GLint result = GL_FALSE; - GLint info_log_length; - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::string program_error(info_log_length, ' '); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", program_error); - } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); - } - } - - if (result == GL_FALSE) { - // There was a problem linking the shader, print the source for debugging purposes. - LogShaderSource(shaders...); - } +namespace OpenGL { - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); +OGLProgram CreateProgram(std::string_view code, GLenum stage); - ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); +OGLProgram CreateProgram(std::span<const u32> code, GLenum stage); - return program_id; -} +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyShaders(Tables& tables) { - FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, - Shaders); -} - void SetupDirtyPolygonModes(Tables& tables) { tables[0][OFF(polygon_mode_front)] = PolygonModeFront; tables[0][OFF(polygon_mode_back)] = PolygonModeBack; @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyScissors(tables); SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); - SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); SetupDirtyDepthTest(tables); SetupDirtyStencilTest(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -52,7 +52,6 @@ enum : u8 { BlendState0, BlendState7 = BlendState0 + 7, - Shaders, ClipDistances, PolygonModes, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ff0f03e99..c373c9cb4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 default: return false; } - const GLenum internal_format = GetFormatTuple(info.format).internal_format; + const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format; const auto& format_info = runtime.FormatInfo(info.type, internal_format); if (format_info.is_compressed) { return false; @@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return GL_R8I; + case Shader::ImageFormat::R8_UINT: + return GL_R8UI; + case Shader::ImageFormat::R16_UINT: + return GL_R16UI; + case Shader::ImageFormat::R16_SINT: + return GL_R16I; + case Shader::ImageFormat::R32_UINT: + return GL_R32UI; + case Shader::ImageFormat::R32G32_UINT: + return GL_RG32UI; + case Shader::ImageFormat::R32G32B32A32_UINT: + return GL_RGBA32UI; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return GL_R32UI; +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array<GLuint, 4> new_handles; glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); @@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); } null_image_views[static_cast<size_t>(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span<const VideoCommon::BufferCopy> copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; + } + full_range = info.range; + flat_range = info.range; + set_object_label = device.HasDebuggingToolAttached(); + is_render_target = info.IsRenderTarget(); + original_texture = image.texture.handle; + num_samples = image.info.num_samples; + if (!is_render_target) { + swizzle[0] = info.x_source; + swizzle[1] = info.y_source; + swizzle[2] = info.z_source; + swizzle[3] = info.w_source; } - VideoCommon::SubresourceRange flatten_range = info.range; - std::array<GLuint, 2> handles; - stored_views.reserve(2); - switch (info.type) { case ImageViewType::e1DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e1D: - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(Shader::TextureType::Color1D); + SetupView(Shader::TextureType::ColorArray1D); break; case ImageViewType::e2DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e2D: if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { @@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .base = {.level = info.range.base.level, .layer = 0}, .extent = {.levels = 1, .layers = 1}, }; - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + full_range = slice_range; + + SetupView(Shader::TextureType::Color3D); + } else { + SetupView(Shader::TextureType::Color2D); + SetupView(Shader::TextureType::ColorArray2D); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(Shader::TextureType::Color3D); break; case ImageViewType::CubeArray: - flatten_range.extent.layers = 6; + flat_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(Shader::TextureType::ColorCube); + SetupView(Shader::TextureType::ColorArrayCube); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info) + : VideoCommon::ImageViewBase{info, view_info} {} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, - GLuint handle, const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } +GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique<StorageViews>(); + } + auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + GLuint& view{type_views[static_cast<size_t>(texture_type)]}; + if (view == 0) { + view = MakeView(texture_type, ShaderFormat(image_format)); + } + return view; +} + +void ImageView::SetupView(Shader::TextureType view_type) { + views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format); +} + +GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { + VideoCommon::SubresourceRange view_range; + switch (view_type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Color2D: + case Shader::TextureType::ColorCube: + view_range = flat_range; + break; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::Color3D: + case Shader::TextureType::ColorArrayCube: + view_range = full_range; + break; + default: + UNREACHABLE(); } - if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); - glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); + OGLTextureView& view = stored_views.emplace_back(); + view.Create(); + + const GLenum target = ImageTarget(view_type, num_samples); + glTextureView(view.handle, target, original_texture, view_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!is_render_target) { + std::array<SwizzleSource, 4> casted_swizzle; + std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) { + return static_cast<SwizzleSource>(component_swizzle); + }); + ApplySwizzle(view.handle, format, casted_swizzle); + } + if (set_object_label) { + const std::string name = VideoCommon::Name(*this); + glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data()); } - stored_views.emplace_back().handle = handle; - views[static_cast<size_t>(view_type)] = handle; + return view.handle; } Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf3b789e3..921072ebe 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include <glad/glad.h> +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; + std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); - void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast<size_t>(query_type)]; + [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast<size_t>(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -196,15 +200,38 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, - const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range); + struct StorageViews { + std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{}; + std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{}; + }; + + void SetupView(Shader::TextureType view_type); + + GLuint MakeView(Shader::TextureType view_type, GLenum view_format); - std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; + std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{}; std::vector<OGLTextureView> stored_views; - GLuint default_handle = 0; + std::unique_ptr<StorageViews> storage_views; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; + GLuint original_texture = 0; + int num_samples = 0; + VideoCommon::SubresourceRange flat_range; + VideoCommon::SubresourceRange full_range; + std::array<u8, 4> swizzle{}; + bool set_object_label = false; + bool is_render_target = false; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include <glad/glad.h> + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..285e78384 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -25,6 +25,7 @@ #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" @@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, } AddTelemetryFields(); InitOpenGLObjects(); + + // Initialize default attributes to match hardware's disabled attributes + GLint max_attribs{}; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); + for (GLint attrib = 0; attrib < max_attribs; ++attrib) { + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f); + } + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - OGLShader vertex_shader; - vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - - OGLShader fragment_shader; - fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); @@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); - - // Enable seamless cubemaps when per texture parameters are not available - if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { - glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); - } - - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } void RendererOpenGL::AddTelemetryFields() { @@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..d455f572f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,8 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_vertex; + OGLProgram present_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..37a4d1d9d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,8 +16,8 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; using VideoCore::Surface::BytesPerBlock; namespace { - OGLProgram MakeProgram(std::string_view source) { - OGLShader shader; - shader.Create(source, GL_COMPUTE_SHADER); - - OGLProgram program; - program.Create(true, false, shader.handle); - return program; + return CreateProgram(source, GL_COMPUTE_SHADER); } size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { return static_cast<size_t>(copy.extent.width * copy.extent.height * copy.src_subresource.num_layers); } - } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) @@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindComputeProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im static constexpr GLuint LOC_SRC_OFFSET = 0; static constexpr GLuint LOC_DST_OFFSET = 1; - program_manager.BindHostCompute(copy_bc4_program.handle); + program_manager.BindComputeProgram(copy_bc4_program.handle); for (const ImageCopy& copy : copies) { ASSERT(copy.src_subresource.base_layer == 0); @@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, break; case 4: { // BGRA8 copy - program_manager.BindHostCompute(copy_bgra_program.handle); + program_manager.BindComputeProgram(copy_bgra_program.handle); constexpr GLenum FORMAT = GL_RGBA8; for (const ImageCopy& copy : copies) { ASSERT(copy.src_offset == zero_offset); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA .bindingCount = 1, .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, }; +template <u32 num_textures> +inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 0, + .texture_buffers = 0, + .image_buffers = 0, + .textures = num_textures, + .images = 0, + .score = 2, +}; constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, - StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + StateTracker& state_tracker_, DescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), - one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), - two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_descriptor_allocator{ + descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, + two_textures_descriptor_allocator{ + descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(one_texture_set_layout.address()))), two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( @@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, + src_view](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, @@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); - const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, - src_stencil_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + src_stencil_view, this](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { - ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } @@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ .width = src_image_view.size.width, .height = src_image_view.size.height, }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb .tex_scale = {viewport.width, viewport.height}, .tex_offset = {0.0f, 0.0f}, }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); // TODO: Barriers diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, - StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -15,9 +15,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { - namespace { - constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; @@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POLYGON, // Patches }; +void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state) { + bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; const std::array enabled_lut{ regs.polygon_offset_point_enable, @@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); raw1 = 0; + extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); + dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); + xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); msaa_mode.Assign(regs.multisample_mode); raw2 = 0; + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); - + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast<u32>(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); + smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); + + for (size_t i = 0; i < regs.rt.size(); ++i) { + color_formats[i] = static_cast<u8>(regs.rt[i].format); + } alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); point_size = Common::BitCast<u32>(regs.point_size); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { - maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; - } - } - if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { - maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast<u32>(input.type.Value())); - attribute.size.Assign(static_cast<u32>(input.size.Value())); + if (maxwell3d.dirty.flags[Dirty::VertexInput]) { + if (has_dynamic_vertex_input) { + // Dirty flag will be reset by the command buffer update + static constexpr std::array LUT{ + 0u, // Invalid + 1u, // SignedNorm + 1u, // UnsignedNorm + 2u, // SignedInt + 3u, // UnsignedInt + 1u, // UnsignedScaled + 1u, // SignedScaled + 1u, // Float + }; + const auto& attrs = regs.vertex_attrib_format; + attribute_types = 0; + for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + const u32 mask = attrs[i].constant != 0 ? 0 : 3; + const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())]; + attribute_types |= static_cast<u64>(type & mask) << (i * 2); + } + } else { + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.constant ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast<u32>(input.type.Value())); + attribute.size.Assign(static_cast<u32>(input.size.Value())); + } } } if (maxwell3d.dirty.flags[Dirty::Blending]) { @@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast<u16>(viewport.swizzle.raw); }); } - if (!has_extended_dynamic_state) { - no_extended_dynamic_state.Assign(1); + if (!extended_dynamic_state) { dynamic_state.Refresh(regs); } + if (xfb_enabled) { + RefreshXfbState(xfb_state, regs); + } } void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -12,6 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/transform_feedback.h" namespace Vulkan { @@ -60,7 +61,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array<bool, 4> Mask() const noexcept { + std::array<bool, 4> Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +98,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); } }; @@ -167,37 +168,53 @@ struct FixedPipelineState { union { u32 raw1; - BitField<0, 1, u32> no_extended_dynamic_state; - BitField<2, 1, u32> primitive_restart_enable; - BitField<3, 1, u32> depth_bias_enable; - BitField<4, 1, u32> depth_clamp_disabled; - BitField<5, 1, u32> ndc_minus_one_to_one; - BitField<6, 2, u32> polygon_mode; - BitField<8, 5, u32> patch_control_points_minus_one; - BitField<13, 2, u32> tessellation_primitive; - BitField<15, 2, u32> tessellation_spacing; - BitField<17, 1, u32> tessellation_clockwise; - BitField<18, 1, u32> logic_op_enable; - BitField<19, 4, u32> logic_op; - BitField<23, 1, u32> rasterize_enable; + BitField<0, 1, u32> extended_dynamic_state; + BitField<1, 1, u32> dynamic_vertex_input; + BitField<2, 1, u32> xfb_enabled; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> depth_clamp_disabled; + BitField<6, 1, u32> ndc_minus_one_to_one; + BitField<7, 2, u32> polygon_mode; + BitField<9, 5, u32> patch_control_points_minus_one; + BitField<14, 2, u32> tessellation_primitive; + BitField<16, 2, u32> tessellation_spacing; + BitField<18, 1, u32> tessellation_clockwise; + BitField<19, 1, u32> logic_op_enable; + BitField<20, 4, u32> logic_op; BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> early_z; + BitField<0, 1, u32> rasterize_enable; + BitField<1, 3, u32> alpha_test_func; + BitField<4, 1, u32> early_z; + BitField<5, 1, u32> depth_enabled; + BitField<6, 5, u32> depth_format; + BitField<11, 1, u32> y_negate; + BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; + BitField<14, 1, u32> smooth_lines; }; + std::array<u8, Maxwell::NumRenderTargets> color_formats; u32 alpha_test_ref; u32 point_size; - std::array<u32, Maxwell::NumVertexArrays> binding_divisors; - std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; std::array<u16, Maxwell::NumViewports> viewport_swizzles; + union { + u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state + u64 enabled_divisors; + }; + std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; + std::array<u32, Maxwell::NumVertexArrays> binding_divisors; + DynamicState dynamic_state; + VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, + bool has_dynamic_vertex_input); size_t Hash() const noexcept; @@ -208,8 +225,24 @@ struct FixedPipelineState { } size_t Size() const noexcept { - const size_t total_size = sizeof *this; - return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + if (xfb_enabled) { + // When transform feedback is enabled, use the whole struct + return sizeof(*this); + } + if (dynamic_vertex_input) { + // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, attributes); + } + if (extended_dynamic_state) { + // Exclude dynamic state + return offsetof(FixedPipelineState, dynamic_state); + } + // Default + return offsetof(FixedPipelineState, xfb_state); + } + + u32 DynamicAttributeType(size_t index) const noexcept { + return (attribute_types >> (index * 2)) & 0b11; } }; static_assert(std::has_unique_object_representations_v<FixedPipelineState>); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -157,7 +157,7 @@ struct FormatTuple { {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM - {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); @@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { return {}; } +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return VK_POLYGON_MODE_POINT; + case Maxwell::PolygonMode::Line: + return VK_POLYGON_MODE_LINE; + case Maxwell::PolygonMode::Fill: + return VK_POLYGON_MODE_FILL; + } + UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); + return {}; +} + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: @@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); @@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..4847db6b6 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,154 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> + +#include <boost/container/small_vector.hpp> + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} + + bool CanUsePushDescriptor() const noexcept { + return device->IsKhrPushDescriptorSupported() && + num_descriptors <= device->MaxPushDescriptors(); + } + + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { + if (bindings.empty()) { + return nullptr; + } + const VkDescriptorSetLayoutCreateFlags flags = + use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + return device->GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); + } + + vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout, + bool use_push_descriptor) const { + if (entries.empty()) { + return nullptr; + } + const VkDescriptorUpdateTemplateType type = + use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR + : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = type, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = pipeline_layout, + .set = 0, + }); + } + + vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + return device->GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); + } + +private: + template <typename Descriptors> + void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { + const size_t num{descriptors.size()}; + for (size_t i = 0; i < num; ++i) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = descriptors[i].count, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = descriptors[i].count, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + num_descriptors += descriptors[i].count; + offset += sizeof(DescriptorUpdateEntry); + } + } + + const Device* device{}; + boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; + boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; + u32 binding{}; + u32 num_descriptors{}; + size_t offset{}; +}; + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue) { + for (const auto& desc : info.texture_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); + } + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); - if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { - const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - const bool is_srgb = use_accelerated && screen_info.is_srgb; - if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); - } - - scheduler.WaitWorker(); - - while (!swapchain.AcquireNextImage()) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { + return; + } + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; + + bool has_been_recreated = false; + const auto recreate_swapchain = [&] { + if (!has_been_recreated) { + has_been_recreated = true; + scheduler.WaitWorker(); } - const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); - - scheduler.Flush(render_semaphore); - - if (swapchain.Present(render_semaphore)) { - blit_screen.Recreate(); + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + swapchain.Create(layout.width, layout.height, is_srgb); + }; + if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { + recreate_swapchain(); + } + bool is_outdated; + do { + swapchain.AcquireNextImage(); + is_outdated = swapchain.IsOutDated(); + if (is_outdated) { + recreate_swapchain(); } - gpu.RendererFrameEndNotify(); - rasterizer.TickFrame(); + } while (is_outdated); + if (has_been_recreated) { + blit_screen.Recreate(); } + const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); + scheduler.Flush(render_semaphore); + scheduler.WaitWorker(); + swapchain.Present(render_semaphore); - render_window.OnFrameDisplayed(); + gpu.RendererFrameEndNotify(); + rasterizer.TickFrame(); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .depth = 1, }, }; - scheduler.Record( - [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); - }); + scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[image_index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + }); } - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], - descriptor_set = descriptor_sets[image_index], buffer = *buffer, - size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, + .renderPass = *renderpass, + .framebuffer = *framebuffers[image_index], .renderArea = { .offset = {0, 0}, @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .extent = size, }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[image_index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); }); @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { void VKBlitScreen::CreateSemaphores() { semaphores.resize(image_count); - std::generate(semaphores.begin(), semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { } void VKBlitScreen::ReleaseRawImages() { - for (std::size_t i = 0; i < raw_images.size(); ++i) { - scheduler.Wait(resource_ticks.at(i)); + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { } return indices; } -} // Anonymous namespace - -Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) - : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} -Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, - VAddr cpu_addr_, u64 size_bytes_) - : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { - buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ +vk::Buffer CreateBuffer(const Device& device, u64 size) { + VkBufferUsageFlags flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (device.IsExtTransformFeedbackSupported()) { + flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + } + return device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .size = size, + .usage = flags, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }); +} +} // Anonymous namespace + +Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} + +Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, + VAddr cpu_addr_, u64 size_bytes_) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), + device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, + commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); +} + +VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + if (!device) { + // Null buffer, return a null descriptor + return VK_NULL_HANDLE; + } + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return *it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .handle = device->GetLogical().CreateBufferView({ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = *buffer, + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, + .offset = offset, + .range = size, + }), + }); + return *views.back().handle; } BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool) + DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..c27402ff0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -9,13 +9,14 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKDescriptorPool; +class DescriptorPool; class VKScheduler; class BufferCacheRuntime; @@ -26,6 +27,8 @@ public: explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_); + [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } @@ -35,8 +38,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + vk::BufferView handle; + }; + + const Device* device{}; vk::Buffer buffer; MemoryCommit commit; + std::vector<BufferView> views; }; class BufferCacheRuntime { @@ -49,7 +61,7 @@ public: explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool); + DescriptorPool& descriptor_pool); void Finish(); @@ -87,6 +99,11 @@ public: BindBuffer(buffer, offset, size); } + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format) { + update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + } + private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { update_descriptor_queue.AddBuffer(buffer, offset, size); @@ -124,6 +141,7 @@ struct BufferCacheParams { static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; constexpr size_t ASTC_NUM_BINDINGS = 4; -VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - return { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = static_cast<u32>(size), - }; -} - -std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { - return {{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +template <size_t size> +inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast<u32>(size), +}; -std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { - return {{ - { - .binding = ASTC_BINDING_INPUT_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_OUTPUT_IMAGE, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 2, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 0, + .score = 2, +}; -VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - return { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 2, +constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = 0, - .stride = sizeof(DescriptorUpdateEntry), - }; -} + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_ENC_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_SWIZZLE_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_OUTPUT_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo ASTC_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 3, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 1, + .score = 4, +}; -std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> -BuildASTCPassDescriptorUpdateTemplateEntry() { - return {{ +constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), +}; + +constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, .dstArrayElement = 0, @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { .stride = sizeof(DescriptorUpdateEntry), }, }}; -} struct AstcPushConstants { std::array<u32, 2> blocks_dims; @@ -159,14 +170,14 @@ struct AstcPushConstants { u32 block_height; u32 block_height_mask; }; - } // Anonymous namespace -VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span<VkDescriptorSetLayoutBinding> bindings, - vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, - vk::Span<VkPushConstantRange> push_constants, - std::span<const u32> code) { +ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, + vk::Span<VkDescriptorSetLayoutBinding> bindings, + vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, + const DescriptorBankInfo& bank_info, + vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) + : device{device_} { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .pipelineLayout = *layout, .set = 0, }); - - descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); } module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast<u32>(code.size_bytes()), .pCode = code.data(), }); + device.SaveShader(code); pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, .layout = *layout, .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); } -VKComputePass::~VKComputePass() = default; +ComputePass::~ComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue) { - if (!descriptor_template) { - return nullptr; - } - const VkDescriptorSet set = descriptor_allocator->Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, + DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), + : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, + VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const VkBuffer buffer{staging.buffer}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); @@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), + : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, + index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - const std::array push_constants = {base_vertex, index_shift}; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + const std::array push_constants{base_vertex, index_shift}; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, @@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( } ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_) - : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), - BuildASTCPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(AstcPushConstants)), - ASTC_DECODER_COMP_SPV), - device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; @@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); - - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); - const VkPipelineLayout vk_layout = *layout; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); - scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, - block_dims, params, set](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, + params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .bytes_per_block_log2 = params.bytes_per_block_log2, @@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .block_height = params.block_height, .block_height_mask = params.block_height_mask, }; - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); - cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -4,7 +4,6 @@ #pragma once -#include <optional> #include <span> #include <utility> @@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue; class Image; struct StagingBufferRef; -class VKComputePass { +class ComputePass { public: - explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span<VkDescriptorSetLayoutBinding> bindings, - vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, - vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); - ~VKComputePass(); + explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span<VkDescriptorSetLayoutBinding> bindings, + vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, + const DescriptorBankInfo& bank_info, + vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); + ~ComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); - + const Device& device; vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; vk::Pipeline pipeline; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; private: - vk::DescriptorSetLayout descriptor_set_layout; - std::optional<DescriptorAllocator> descriptor_allocator; vk::ShaderModule module; }; -class Uint8Pass final : public VKComputePass { +class Uint8Pass final : public ComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); @@ -66,10 +65,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class QuadIndexedPass final : public VKComputePass { +class QuadIndexedPass final : public ComputePass { public: explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); @@ -84,10 +83,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class ASTCDecoderPass final : public VKComputePass { +class ASTCDecoderPass final : public ComputePass { public: explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_); @@ -99,7 +98,6 @@ public: private: void MakeDataBuffer(); - const Device& device; VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,152 +2,198 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <vector> +#include <boost/container/small_vector.hpp> + +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} - -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector<VkDescriptorSetLayoutBinding> bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast<u32>(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + +ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, + vk::ShaderModule spv_module_) + : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)) { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + + auto func{[this, &descriptor_pool, shader_notify] { + DescriptorLayoutBuilder builder{device}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + + descriptor_set_layout = builder.CreateDescriptorSetLayout(false); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, + .module = *spv_module, .pName = "main", .pSpecializationInfo = nullptr, }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(); + } +} + +void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, TextureCache& texture_cache) { + update_descriptor_queue.Acquire(); + + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; } - return device.GetLogical().CreateComputePipeline(ci); + texture_cache.SynchronizeComputeDescriptors(); + + static constexpr size_t max_elements = 64; + std::array<ImageId, max_elements> image_view_ids; + boost::container::static_vector<u32, max_elements> image_view_indices; + boost::container::static_vector<VkSampler, max_elements> samplers; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || + std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers.push_back(sampler->Handle()); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + ImageId* texture_buffer_ids{image_view_ids.data()}; + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texture_buffer_ids; + ++index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); + + if (!is_built.load(std::memory_order::relaxed)) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); + } + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,61 +4,63 @@ #pragma once +#include <atomic> +#include <condition_variable> +#include <mutex> + #include "common/common_types.h" +#include "common/thread_worker.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { class Device; class VKScheduler; -class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); + explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, + vk::ShaderModule spv_module); - VkPipeline GetHandle() const { - return *pipeline; - } + ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; + ComputePipeline(ComputePipeline&&) noexcept = delete; - VkPipelineLayout GetLayout() const { - return *layout; - } + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; - const ShaderEntries& GetEntries() const { - return entries; - } + void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; - - vk::Pipeline CreatePipeline() const; - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; + VKUpdateDescriptorQueue& update_descriptor_queue; + Shader::Info info; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + + vk::ShaderModule spv_module; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <mutex> +#include <span> #include <vector> #include "common/common_types.h" @@ -13,79 +15,149 @@ namespace Vulkan { -// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. -constexpr std::size_t SETS_GROW_RATE = 0x20; +// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines +constexpr size_t SETS_GROW_RATE = 16; +constexpr s32 SCORE_THRESHOLD = 3; +constexpr u32 SETS_PER_POOL = 64; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, - VkDescriptorSetLayout layout_) - : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} +struct DescriptorBank { + DescriptorBankInfo info; + std::vector<vk::DescriptorPool> pools; +}; -DescriptorAllocator::~DescriptorAllocator() = default; +bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { + return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && + texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && + textures >= subset.textures && images >= subset.image_buffers; +} -VkDescriptorSet DescriptorAllocator::Commit() { - const std::size_t index = CommitResource(); - return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +template <typename Descriptors> +static u32 Accumulate(const Descriptors& descriptors) { + u32 count = 0; + for (const auto& descriptor : descriptors) { + count += descriptor.count; + } + return count; } -void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); +static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) { + DescriptorBankInfo bank; + for (const Shader::Info& info : infos) { + bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); + bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); + bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); + bank.image_buffers += Accumulate(info.image_buffer_descriptors); + bank.textures += Accumulate(info.texture_descriptors); + bank.images += Accumulate(info.image_descriptors); + } + bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + + bank.image_buffers + bank.textures + bank.images; + return bank; } -VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) - : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ - AllocateNewPool()} {} - -VKDescriptorPool::~VKDescriptorPool() = default; - -vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { - static constexpr u32 num_sets = 0x20000; - static constexpr VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, +static void AllocatePool(const Device& device, DescriptorBank& bank) { + std::array<VkDescriptorPoolSize, 6> pool_sizes; + size_t pool_cursor{}; + const auto add = [&](VkDescriptorType type, u32 count) { + if (count > 0) { + pool_sizes[pool_cursor++] = { + .type = type, + .descriptorCount = count * SETS_PER_POOL, + }; + } }; - - const VkDescriptorPoolCreateInfo ci{ + const auto& info{bank.info}; + add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); + add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); + add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); + add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); + bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - .maxSets = num_sets, - .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), + .maxSets = SETS_PER_POOL, + .poolSizeCount = static_cast<u32>(pool_cursor), .pPoolSizes = std::data(pool_sizes), - }; - return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); + })); +} + +DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_) + : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, + layout{layout_} {} + +VkDescriptorSet DescriptorAllocator::Commit() { + const size_t index = CommitResource(); + return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; } -vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, - std::size_t count) { - const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai{ +void DescriptorAllocator::Allocate(size_t begin, size_t end) { + sets.push_back(AllocateDescriptors(end - begin)); +} + +vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { + const std::vector<VkDescriptorSetLayout> layouts(count, layout); + VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = nullptr, - .descriptorPool = **active_pool, + .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast<u32>(count), - .pSetLayouts = layout_copies.data(), + .pSetLayouts = layouts.data(), }; - - vk::DescriptorSets sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // Our current pool is out of memory. Allocate a new one and retry - active_pool = AllocateNewPool(); - ai.descriptorPool = **active_pool; - sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + AllocatePool(*device, *bank); + allocate_info.descriptorPool = *bank->pools.back(); + new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // After allocating a new pool, we are out of memory again. We can't handle this from here. throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } +DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} + +DescriptorPool::~DescriptorPool() = default; + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + std::span<const Shader::Info> infos) { + return Allocator(layout, MakeBankInfo(infos)); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const Shader::Info& info) { + return Allocator(layout, MakeBankInfo(std::array{info})); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const DescriptorBankInfo& info) { + return DescriptorAllocator(device, master_semaphore, Bank(info), layout); +} + +DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { + std::shared_lock read_lock{banks_mutex}; + const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { + return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); + }); + if (it != bank_infos.end()) { + return *banks[std::distance(bank_infos.begin(), it)].get(); + } + read_lock.unlock(); + + std::unique_lock write_lock{banks_mutex}; + bank_infos.push_back(reqs); + + auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>()); + bank.info = reqs; + AllocatePool(device, bank); + return bank; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -4,57 +4,85 @@ #pragma once +#include <shared_mutex> +#include <span> #include <vector> +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKDescriptorPool; class VKScheduler; +struct DescriptorBank; + +struct DescriptorBankInfo { + [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; + + u32 uniform_buffers{}; ///< Number of uniform buffer descriptors + u32 storage_buffers{}; ///< Number of storage buffer descriptors + u32 texture_buffers{}; ///< Number of texture buffer descriptors + u32 image_buffers{}; ///< Number of image buffer descriptors + u32 textures{}; ///< Number of texture descriptors + u32 images{}; ///< Number of image descriptors + s32 score{}; ///< Number of descriptors in total +}; + class DescriptorAllocator final : public ResourcePool { + friend class DescriptorPool; + public: - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + explicit DescriptorAllocator() = default; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; VkDescriptorSet Commit(); -protected: - void Allocate(std::size_t begin, std::size_t end) override; - private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_); - std::vector<vk::DescriptorSets> descriptors_allocations; -}; + void Allocate(size_t begin, size_t end) override; + + vk::DescriptorSets AllocateDescriptors(size_t count); + + const Device* device{}; + DescriptorBank* bank{}; + VkDescriptorSetLayout layout{}; -class VKDescriptorPool final { - friend DescriptorAllocator; + std::vector<vk::DescriptorSets> sets; +}; +class DescriptorPool { public: - explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); - ~VKDescriptorPool(); + explicit DescriptorPool(const Device& device, VKScheduler& scheduler); + ~DescriptorPool(); - VKDescriptorPool(const VKDescriptorPool&) = delete; - VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + DescriptorPool& operator=(const DescriptorPool&) = delete; + DescriptorPool(const DescriptorPool&) = delete; -private: - vk::DescriptorPool* AllocateNewPool(); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, + std::span<const Shader::Info> infos); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); - vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); +private: + DescriptorBank& Bank(const DescriptorBankInfo& reqs); const Device& device; MasterSemaphore& master_semaphore; - std::vector<vk::DescriptorPool> pools; - vk::DescriptorPool* active_pool; + std::shared_mutex banks_mutex; + std::vector<DescriptorBankInfo> bank_infos; + std::vector<std::unique_ptr<DescriptorBank>> banks; }; } // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index fc6dd83eb..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -1,29 +1,58 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <algorithm> -#include <array> -#include <cstring> -#include <vector> +#include <span> -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include <boost/container/small_vector.hpp> +#include <boost/container/static_vector.hpp> + +#include "common/bit_field.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { -MICROPROFILE_DECLARE(Vulkan_PipelineCache); +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif +namespace Vulkan { namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; +constexpr size_t MAX_IMAGE_ELEMENTS = 64; + +DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) { + DescriptorLayoutBuilder builder{device}; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder; +} template <class StencilFace> VkStencilOpState GetStencilFaceState(const StencilFace& face) { @@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) { } bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { + static constexpr std::array unsupported_topologies{ VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +bool IsLine(VkPrimitiveTopology topology) { + static constexpr std::array line_topologies{ + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, + // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, + }; + return std::ranges::find(line_topologies, topology) == line_topologies.end(); } VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { @@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { BitField<12, 3, Maxwell::ViewportSwizzle> w; }; const Swizzle unpacked{swizzle}; - - return { + return VkViewportSwizzleNV{ .x = MaxwellToVK::ViewportSwizzle(unpacked.x), .y = MaxwellToVK::ViewportSwizzle(unpacked.y), .z = MaxwellToVK::ViewportSwizzle(unpacked.z), @@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { }; } -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; } + return PixelFormatFromRenderTargetFormat(format); } -} // Anonymous namespace +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span<VkDescriptorSetLayoutBinding> bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; +size_t NumAttachments(const FixedPipelineState& state) { + size_t num{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])}; + if (format != Tegra::RenderTargetFormat::NONE) { + num = index + 1; + } + } + return num; } -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span<VkDescriptorSetLayoutBinding> bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); +template <typename Spec> +bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules, + const std::array<Shader::Info, NUM_STAGES>& stage_infos) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (!Spec::enabled_stages[stage] && modules[stage]) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; } -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template <typename Spec, typename... Specs> +ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules, + const std::array<Shader::Info, NUM_STAGES>& stage_infos) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes<Spec>(modules, stage_infos)) { + return FindSpec<Specs...>(modules, stage_infos); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc<Spec>(); } -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector<VkDescriptorUpdateTemplateEntry> template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); +struct SimpleVertexFragmentSpec { + static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { + static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules, + const std::array<Shader::Info, NUM_STAGES>& infos) { + return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos); +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, + VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, + RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, + std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos) + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + const Shader::Info* const info{infos[stage]}; + if (!info) { + continue; } + stage_infos[stage] = *info; + enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; + std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + uses_push_descriptor = builder.CanUsePushDescriptor(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); + if (!uses_push_descriptor) { + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + } + const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; + pipeline_layout = builder.CreatePipelineLayout(set_layout); + descriptor_update_template = + builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; + Validate(); + MakePipeline(render_pass); + + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + }}; + if (worker_thread) { + worker_thread->QueueWork(std::move(func)); + } else { + func(); } + configure_func = ConfigureFunc(spv_modules, stage_infos); +} - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); } -std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; +template <typename Spec> +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { + std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids; + std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices; + std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; + size_t sampler_index{}; + size_t image_index{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || + std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - std::vector<vk::ShaderModule> shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_index++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); - device.SaveShader(stage->code); + const Shader::Info& info{stage_infos[stage]}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); + update_descriptor_queue.Acquire(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { + buffer_cache.BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, + update_descriptor_queue); + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); } - return shader_modules; + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + ConfigureDraw(); } -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector<VkVertexInputBindingDescription> vertex_bindings; - std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast<u32>(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, +void GraphicsPipeline::ConfigureDraw() { + texture_cache.UpdateRenderTargets(false); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + + if (!is_built.load(std::memory_order::relaxed)) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast<u32>(index), - .divisor = state.binding_divisors[index], - }); - } } + const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + if (bind_pipeline) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + } + if (!descriptor_set_layout) { + return; + } + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, + 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } + }); +} - std::vector<VkVertexInputAttributeDescription> vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; +void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!key.state.extended_dynamic_state) { + dynamic = key.state.dynamic_state; + } + static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; + static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; + static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const u32 type = key.state.DynamicAttributeType(index); + if (!stage_infos[0].loads.Generic(index) || type == 0) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast<u32>(index), + .binding = 0, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, + .offset = 0, + }); } - if (!input_attributes.contains(static_cast<u32>(index))) { - // Skip attributes not used by the vertex shaders. - continue; + if (!vertex_attributes.empty()) { + vertex_bindings.push_back({ + .binding = 0, + .stride = 4, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }); + } + } else { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = + instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast<u32>(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast<u32>(index), + .divisor = key.state.binding_divisors[index], + }); + } + } + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast<u32>(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - vertex_attributes.push_back({ - .location = static_cast<u32>(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); } - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .pNext = nullptr, @@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), .pVertexAttributeDescriptions = vertex_attributes.data(), }; - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, .pNext = nullptr, @@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); + if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { + if (!spv_modules[1] && !spv_modules[2]) { + LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); + input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + } const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .topology = input_assembly_topology, + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; - const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } + const VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, + static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast<VkCullModeFlags>( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .lineRasterizationMode = key.state.smooth_lines != 0 + ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT + : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, + .stippledLineEnable = VK_FALSE, // TODO + .lineStippleFactor = 0, + .lineStipplePattern = 0, + }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { + line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); + } + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, .alphaToCoverageEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE, }; - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .pNext = nullptr, @@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, .depthCompareOp = dynamic.depth_test_enable ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), .stencilTestEnable = dynamic.stencil_enable, .front = GetStencilFaceState(dynamic.front), .back = GetStencilFaceState(dynamic.back), .minDepthBounds = 0.0f, .maxDepthBounds = 0.0f, }; - - std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ + if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + } + static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; + const size_t num_attachments{NumAttachments(key.state)}; + for (size_t index = 0; index < num_attachments; ++index) { + static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } + const auto& blend{key.state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; } - - cb_attachments[index] = { + cb_attachments.push_back({ .blendEnable = blend.enable != 0, .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), @@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; + .colorWriteMask = write_mask, + }); } - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, + .attachmentCount = static_cast<u32>(cb_attachments.size()), .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - - std::vector dynamic_states{ + static_vector<VkDynamicState, 19> dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_LINE_WIDTH, }; - if (device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, @@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; + if (key.state.dynamic_vertex_input) { + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, @@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, .dynamicStateCount = static_cast<u32>(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, }; - - std::vector<VkPipelineShaderStageCreateInfo> shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { + static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { continue; } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; } + */ } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ + pipeline = device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, .pDepthStencilState = &depth_stencil_ci, .pColorBlendState = &color_blend_ci, .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, + .layout = *pipeline_layout, + .renderPass = render_pass, .subpass = 0, .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); } +void GraphicsPipeline::Validate() { + size_t num_images{}; + for (const auto& info : stage_infos) { + for (const auto& desc : info.texture_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_images <= MAX_IMAGE_ELEMENTS); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8b6a98fe0..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,30 +1,36 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once +#include <algorithm> #include <array> -#include <optional> -#include <vector> +#include <atomic> +#include <condition_variable> +#include <mutex> +#include <type_traits> -#include "common/common_types.h" +#include "common/thread_worker.h" +#include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -namespace Vulkan { +namespace VideoCore { +class ShaderNotify; +} -using Maxwell = Tegra::Engines::Maxwell3D::Regs; +namespace Vulkan { struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - FixedPipelineState fixed_state; + std::array<u64, 6> unique_hashes; + FixedPipelineState state; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; @@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey { return !operator==(rhs); } - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); } }; static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); +} // namespace Vulkan + +namespace std { +template <> +struct hash<Vulkan::GraphicsPipelineCacheKey> { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; -class VKDescriptorPool; +class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; -class VKGraphicsPipeline final { public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span<VkDescriptorSetLayoutBinding> bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; + explicit GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + VideoCore::ShaderNotify* shader_notify, const Device& device, + DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; + GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + + void AddTransition(GraphicsPipeline* transition); + + void Configure(bool is_indexed) { + configure_func(this, is_indexed); } - VkPipelineLayout GetLayout() const { - return *layout; + [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; } - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); } -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span<VkDescriptorSetLayoutBinding> bindings) const; + template <typename Spec> + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); }; + } - vk::PipelineLayout CreatePipelineLayout() const; +private: + template <typename Spec> + void ConfigureImpl(bool is_indexed); - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; + void ConfigureDraw(); - std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; + void MakePipeline(VkRenderPass render_pass); - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; + void Validate(); + const GraphicsPipelineCacheKey key; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; + VKUpdateDescriptorQueue& update_descriptor_queue; + + void (*configure_func)(GraphicsPipeline*, bool){}; + + std::vector<GraphicsPipelineCacheKey> transition_keys; + std::vector<GraphicsPipeline*> transitions; + + std::array<vk::ShaderModule, NUM_STAGES> spv_modules; + + std::array<Shader::Info, NUM_STAGES> stage_infos; + std::array<u32, 5> enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector<vk::ShaderModule> modules; - + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; + bool uses_push_descriptor{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -39,9 +39,9 @@ public: return KnownGpuTick() >= tick; } - /// Advance to the logical tick. - void NextTick() noexcept { - ++current_tick; + /// Advance to the logical tick and return the old one + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order::relaxed); } /// Refresh the known GPU tick diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -4,444 +4,613 @@ #include <algorithm> #include <cstddef> +#include <fstream> #include <memory> +#include <thread> #include <vector> #include "common/bit_cast.h" #include "common/cityhash.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; + +constexpr u32 CACHE_VERSION = 5; + +template <typename Container> +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { - return program == 0 ? 0 : program - 1; +Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return Shader::CompareFunction::Never; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return Shader::CompareFunction::Less; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return Shader::CompareFunction::Equal; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return Shader::CompareFunction::LessThanEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return Shader::CompareFunction::Greater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return Shader::CompareFunction::NotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return Shader::CompareFunction::GreaterThanEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return Shader::CompareFunction::Always; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); + return {}; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; +Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { + switch (state.DynamicAttributeType(index)) { + case 0: + return Shader::AttributeType::Disabled; + case 1: + return Shader::AttributeType::Float; + case 2: + return Shader::AttributeType::SignedInt; + case 3: + return Shader::AttributeType::UnsignedInt; } + return Shader::AttributeType::Disabled; } -template <VkDescriptorType descriptor_type, class Container> -void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast<u32>(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; +Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs, + const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { + Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); + } else { + info.previous_stage_stores.mask.set(); + } + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast<float>(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + } + info.convert_depth_mode = gl_ndc; + } + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + info.generic_input_types[index] = AttributeType(key.state, index); + } + } else { + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + } + break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.state.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast<Maxwell::TessellationPrimitive>(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast<Maxwell::TessellationSpacing>(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + } + info.convert_depth_mode = gl_ndc; + break; + case Shader::Stage::Fragment: + info.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref); + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; } + info.force_early_z = key.state.early_z != 0; + info.y_negate = key.state.y_negate != 0; + return info; } +} // Anonymous namespace -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector<VkDescriptorSetLayoutBinding>& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); - AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); - AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); - AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); - AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); - AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); - return binding; +size_t ComputePipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); + return static_cast<size_t>(hash); } -} // Anonymous namespace +bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, sizeof *this) == 0; +} -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { +size_t GraphicsPipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); - return static_cast<std::size_t>(hash); + return static_cast<size_t>(hash); } bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, Size()) == 0; } -std::size_t ComputePipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); - return static_cast<std::size_t>(hash); -} - -bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, sizeof *this) == 0; +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), + serialization_thread(1, "yuzu:PipelineSerialization") { + const auto& float_control{device.FloatControlProperties()}; + const VkDriverIdKHR driver_id{device.GetDriverID()}; + profile = Shader::Profile{ + .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, + .unified_descriptor_binding = true, + .support_descriptor_aliasing = true, + .support_int8 = true, + .support_int16 = device.IsShaderInt16Supported(), + .support_int64 = device.IsShaderInt64Supported(), + .support_vertex_instance_id = false, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .support_fp64_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_mask = device.IsNvViewportArray2Supported(), + .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), + .support_demote_to_helper_invocation = true, + .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), + + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, + .ignore_nan_fp_comparisons = false, + }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = device.IsShaderInt64Supported(), + }; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} - -Shader::~Shader() = default; - -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} - -VKPipelineCache::~VKPipelineCache() = default; +PipelineCache::~PipelineCache() = default; -std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { - std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast<Maxwell::ShaderProgram>(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } + if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + device.IsExtVertexInputDynamicStateSupported()); + + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return BuiltPipeline(current_pipeline); } - shaders[index] = result; } - return last_shaders = shaders; + return CurrentGraphicsPipelineSlowPath(); } -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; + const ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); + const auto& qmd{kepler_compute.launch_description}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; + pipeline = CreateComputePipeline(key, shader); + return pipeline.get(); } -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = base_dir / "vulkan.bin"; + + struct { + std::mutex mutex; + size_t total{}; + size_t built{}; + bool has_loaded{}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + std::lock_guard lock{state.mutex}; + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); + const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); + const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + + if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || + (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { + return; + } + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector<Shader::Environment*, 5> env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + load_graphics); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); - auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); + workers.WaitForRequests(); +} - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } +GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; + if (!pipeline) { + return nullptr; + } + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); } -void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); +GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; + } + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; } -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; +std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span<Shader::Environment* const> envs, bool build_in_parallel) try { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { continue; } - Finish(); - it = graphics_cache.erase(it); + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); + } else { + // VertexB path when VertexA is present. + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); + } } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; + std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; + std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; + + const Shader::IR::Program* previous_stage{}; + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { + if (key.unique_hashes[index] == 0) { continue; } - Finish(); - it = compute_cache.erase(it); + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; + const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)}; + device.SaveShader(code); + modules[stage_index] = BuildShader(device, code); + if (device.HasDebuggingToolAttached()) { + const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; + modules[stage_index].SetObjectNameEXT(name.c_str()); + } + previous_stage = &program; } + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique<GraphicsPipeline>( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, + std::move(modules), infos); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } -std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector<VkDescriptorSetLayoutBinding> bindings; +std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } - return {std::move(program), std::move(bindings)}; -} - -template <VkDescriptorType descriptor_type, class Container> -void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast<u32>(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { + boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> + env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] != 0) { + env_ptrs.push_back(&envs[index]); + } } - return; - } + SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); + }); + return pipeline; +} - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast<std::size_t>(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); +std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } - offset += count * entry_size; - binding += count; + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, + pipeline_cache_filename, CACHE_VERSION); + }); + return pipeline; } -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { - AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); - AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); - AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); - AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); - AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); - AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); +std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, + bool build_in_parallel) try { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + const std::vector<u32> code{EmitSPIRV(profile, program)}; + device.SaveShader(code); + vk::ShaderModule spv_module{BuildShader(device, code)}; + if (device.HasDebuggingToolAttached()) { + const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; + spv_module.SetObjectNameEXT(name.c_str()); + } + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, + thread_worker, &shader_notify, program.info, + std::move(spv_module)); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,24 +6,28 @@ #include <array> #include <cstddef> +#include <filesystem> +#include <iosfwd> #include <memory> #include <type_traits> #include <unordered_map> #include <utility> #include <vector> -#include <boost/functional/hash.hpp> - #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" +#include "common/thread_worker.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -31,23 +35,24 @@ namespace Core { class System; } -namespace Vulkan { +namespace Shader::IR { +struct Program; +} -class Device; -class RasterizerVulkan; -class VKComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; +namespace VideoCore { +class ShaderNotify; +} + +namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u64 unique_hash; u32 shared_memory_size; std::array<u32, 3> workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); namespace std { template <> -struct hash<Vulkan::GraphicsPipelineCacheKey> { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -template <> struct hash<Vulkan::ComputePipelineCacheKey> { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> { namespace Vulkan { -class Shader { -public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); - ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } +class ComputePipeline; +class Device; +class DescriptorPool; +class RasterizerVulkan; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } +using VideoCommon::ShaderInfo; - const ShaderEntries& GetEntries() const { - return entries; +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; + Shader::ObjectPool<Shader::IR::Inst> inst; + Shader::ObjectPool<Shader::IR::Block> block; + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; }; -class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); + ~PipelineCache(); + + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); - std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); +private: + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); - void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; -protected: - void OnShaderRemoval(Shader* shader) final; + std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); -private: - std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( - const FixedPipelineState& fixed_state); + std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span<Shader::Environment* const> envs, bool build_in_parallel); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; + std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); + + std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env, + bool build_in_parallel); const Device& device; VKScheduler& scheduler; - VKDescriptorPool& descriptor_pool; + DescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + VideoCore::ShaderNotify& shader_notify; + bool use_asynchronous_shaders{}; - std::unique_ptr<Shader> null_shader; - std::unique_ptr<Shader> null_kernel; + GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; - std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; + std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; + std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; + ShaderPools main_pools; - std::mutex pipeline_cache; - std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> - graphics_cache; - std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; -}; + Shader::Profile profile; + Shader::HostTranslateInfo host_info; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); + std::filesystem::path pipeline_cache_filename; + + Common::ThreadWorker workers; + Common::ThreadWorker serialization_thread; +}; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,14 +114,10 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.GetScheduler().CurrentTick()) { - cache.GetScheduler().Flush(); - } - + cache.GetScheduler().Wait(tick); u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( - query.first, query.second, 1, sizeof(data), &data, sizeof(data), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); switch (query_result) { case VK_SUCCESS: diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -55,11 +54,10 @@ struct DrawParams { u32 num_instances; u32 base_vertex; u32 num_vertices; + u32 first_index; bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; @@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( - const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { - std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template <typename Engine, typename Entry> -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); - if constexpr (std::is_same_v<Entry, SamplerEntry>) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan .num_instances = is_instanced ? num_instances : 1, .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, + .first_index = is_indexed ? regs.index_array.first : 0, .is_indexed = is_indexed, }; if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { @@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache, gpu.ShaderNotify()), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; @@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { return; } + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); BeginTransformFeedback(); - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); UpdateDynamicStates(); - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, + draw_params.first_index, draw_params.base_vertex, + draw_params.base_instance); } else { cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, draw_params.base_vertex, draw_params.base_instance); } }); - EndTransformFeedback(); } @@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() { if (!maxwell3d.ShouldExecute()) { return; } + FlushWork(); query_cache.UpdateCounters(); @@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); +void RasterizerVulkan::DispatchCompute() { + FlushWork(); - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; + pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); - // Compute dispatches can't be executed inside a renderpass + const auto& qmd{kepler_compute.launch_description}; + const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() { void RasterizerVulkan::FragmentBarrier() { // We already put barriers when a render pass finishes + scheduler.RequestOutsideRenderPassOperationContext(); } void RasterizerVulkan::TiledCacheBarrier() { @@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { - if (draw_counter > 0) { - draw_counter = 0; - scheduler.Flush(); + if (draw_counter == 0) { + return; } + draw_counter = 0; + scheduler.Flush(); } void RasterizerVulkan::TickFrame() { @@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, if (!image_view) { return false; } - screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } +void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); +} + void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; @@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); @@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthBoundsTestEnable(regs); @@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateFrontFace(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { + UpdateVertexInput(regs); + } } } @@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; @@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineWidth()) { + return; + } + const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; + scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; @@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } + bool enabled = regs.depth_bounds_enable; + if (enabled && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + enabled = false; + } scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); @@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } +void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[Dirty::VertexInput]) { + return; + } + dirty[Dirty::VertexInput] = false; + + boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings; + boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes; + + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; + } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { + const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; + const u32 binding{attribute.buffer}; + dirty[Dirty::VertexAttribute0 + index] = false; + dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true; + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast<u32>(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexBinding0 + index]) { + continue; + } + dirty[Dirty::VertexBinding0 + index] = false; + + const u32 binding{static_cast<u32>(index)}; + const auto& input_binding{regs.vertex_array[binding]}; + const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; + bindings.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, + .pNext = nullptr, + .binding = binding, + .stride = input_binding.stride, + .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, + .divisor = is_instanced ? input_binding.divisor : 1, + }); + } + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + cmdbuf.SetVertexInputEXT(bindings, attributes); + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,14 +21,13 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -102,19 +101,8 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; private: static constexpr size_t MAX_TEXTURES = 192; @@ -125,46 +113,19 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); @@ -175,6 +136,8 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; @@ -187,24 +150,22 @@ private: VKScheduler& scheduler; StagingBufferPool staging_pool; - VKDescriptorPool descriptor_pool; + DescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - - GraphicsPipelineCacheKey graphics_key; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..451ffe019 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,96 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <unordered_map> + +#include <boost/container/static_vector.hpp> + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + std::lock_guard lock{mutex}; + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector<VkAttachmentDescription, 9> descriptions; + std::array<VkAttachmentReference, 8> references{}; + u32 num_attachments{}; + u32 num_colors{}; + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + const bool is_valid{format != PixelFormat::Invalid}; + references[index] = VkAttachmentReference{ + .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (is_valid) { + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + num_attachments = static_cast<u32>(index + 1); + ++num_colors; + } + } + const bool has_depth{key.depth_format != PixelFormat::Invalid}; + VkAttachmentReference depth_reference{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_reference = VkAttachmentReference{ + .attachment = num_colors, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = num_attachments, + .pColorAttachments = references.data(), + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptions.size()), + .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..eaa0ed775 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <mutex> +#include <unordered_map> + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array<VideoCore::Surface::PixelFormat, 8> color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash<Vulkan::RenderPassKey> { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast<size_t>(key.depth_format) << 48; + value ^= static_cast<size_t>(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + +class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map<RenderPassKey, vk::RenderPass> cache; + std::mutex mutex; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector<u64> ticks; ///< Ticks for each resource diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { command->~Command(); command = next; } - + submit = false; command_offset = 0; first = nullptr; last = nullptr; @@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) master_semaphore{std::make_unique<MasterSemaphore>(device)}, command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { AcquireNewChunk(); - AllocateNewContext(); + AllocateWorkerCommandBuffer(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); } VKScheduler::~VKScheduler() { - quit = true; - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + quit = true; + } + work_cv.notify_all(); worker_thread.join(); } @@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { void VKScheduler::Finish(VkSemaphore semaphore) { const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); + WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - bool finished = false; - do { - cv.notify_all(); - std::unique_lock lock{mutex}; - finished = chunk_queue.Empty(); - } while (!finished); + std::unique_lock lock{work_mutex}; + wait_cv.wait(lock, [this] { return work_queue.empty(); }); } void VKScheduler::DispatchWork() { if (chunk->Empty()) { return; } - chunk_queue.Push(std::move(chunk)); - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + work_queue.push(std::move(chunk)); + } + work_cv.notify_one(); AcquireNewChunk(); } @@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { +bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { if (state.graphics_pipeline == pipeline) { - return; + return false; } state.graphics_pipeline = pipeline; - Record([pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - }); + return true; } void VKScheduler::WorkerThread() { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); - std::unique_lock lock{mutex}; + Common::SetCurrentThreadName("yuzu:VulkanWorker"); do { - cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); - if (quit) { - continue; + if (work_queue.empty()) { + wait_cv.notify_all(); + } + std::unique_ptr<CommandChunk> work; + { + std::unique_lock lock{work_mutex}; + work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); + if (quit) { + continue; + } + work = std::move(work_queue.front()); + work_queue.pop(); + } + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); } - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf); - chunk_reserve.Push(std::move(extracted_chunk)); + std::lock_guard reserve_lock{reserve_mutex}; + chunk_reserve.push_back(std::move(work)); } while (!quit); } +void VKScheduler::AllocateWorkerCommandBuffer() { + current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); +} + void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - WaitWorker(); - std::unique_lock lock{mutex}; + const u64 signal_value = master_semaphore->NextTick(); + Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + cmdbuf.End(); - current_cmdbuf.End(); + const u32 num_signal_semaphores = semaphore ? 2U : 1U; - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - const u32 num_signal_semaphores = semaphore ? 2U : 1U; + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - const u64 signal_value = master_semaphore->CurrentTick(); - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; - master_semaphore->NextTick(); - - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; - - const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, - .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, - .commandBufferCount = 1, - .pCommandBuffers = current_cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { - case VK_SUCCESS: - break; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - vk::Check(result); - } + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + } + }); + chunk->MarkSubmit(); + DispatchWork(); } void VKScheduler::AllocateNewContext() { - std::unique_lock lock{mutex}; - - current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); - current_cmdbuf.Begin({ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - .pInheritanceInfo = nullptr, - }); - // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); @@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() { } void VKScheduler::AcquireNewChunk() { - if (chunk_reserve.Empty()) { + std::lock_guard lock{reserve_mutex}; + if (chunk_reserve.empty()) { chunk = std::make_unique<CommandChunk>(); return; } - chunk = std::move(chunk_reserve.Front()); - chunk_reserve.Pop(); + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -8,12 +8,12 @@ #include <condition_variable> #include <cstddef> #include <memory> -#include <stack> #include <thread> #include <utility> +#include <queue> + #include "common/alignment.h" #include "common/common_types.h" -#include "common/threadsafe_queue.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -22,6 +22,7 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; +class GraphicsPipeline; class StateTracker; class VKQueryCache; @@ -52,8 +53,8 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); - /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(VkPipeline pipeline); + /// Update the pipeline to the current execution context. + bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -85,6 +86,10 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } master_semaphore->Wait(tick); } @@ -154,15 +159,24 @@ private: return true; } + void MarkSubmit() { + submit = true; + } + bool Empty() const { return command_offset == 0; } + bool HasSubmit() const { + return submit; + } + private: Command* first = nullptr; Command* last = nullptr; size_t command_offset = 0; + bool submit = false; alignas(std::max_align_t) std::array<u8, 0x8000> data{}; }; @@ -170,11 +184,13 @@ private: VkRenderPass renderpass = nullptr; VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; + GraphicsPipeline* graphics_pipeline = nullptr; }; void WorkerThread(); + void AllocateWorkerCommandBuffer(); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); @@ -204,11 +220,13 @@ private: std::array<VkImage, 9> renderpass_images{}; std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; - Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; - Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; - std::mutex mutex; - std::condition_variable cv; - bool quit = false; + std::queue<std::unique_ptr<CommandChunk>> work_queue; + std::vector<std::unique_ptr<CommandChunk>> chunk_reserve; + std::mutex reserve_mutex; + std::mutex work_mutex; + std::condition_variable work_cv; + std::condition_variable wait_cv; + std::atomic_bool quit{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null @@ -1,3166 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <functional> -#include <limits> -#include <map> -#include <optional> -#include <type_traits> -#include <unordered_map> -#include <utility> - -#include <fmt/format.h> - -#include <sirit/sirit.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Vulkan { - -namespace { - -using Sirit::Id; -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -// TODO(Rodrigo): Use rasterizer's value -constexpr u32 MaxConstBufferFloats = 0x4000; -constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; - -constexpr u32 NumInputPatches = 32; // This value seems to be the standard - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -class Expression final { -public: - Expression(Id id_, Type type_) : id{id_}, type{type_} { - ASSERT(type_ != Type::Void); - } - Expression() : type{Type::Void} {} - - Id id{}; - Type type{}; -}; -static_assert(std::is_standard_layout_v<Expression>); - -struct TexelBuffer { - Id image_type{}; - Id image{}; -}; - -struct SampledImage { - Id image_type{}; - Id sampler_type{}; - Id sampler_pointer_type{}; - Id variable{}; -}; - -struct StorageImage { - Id image_type{}; - Id image{}; -}; - -struct AttributeType { - Type type; - Id scalar; - Id vector; -}; - -struct VertexIndices { - std::optional<u32> position; - std::optional<u32> layer; - std::optional<u32> viewport; - std::optional<u32> point_size; - std::optional<u32> clip_distances; -}; - -struct GenericVaryingDescription { - Id id = nullptr; - u32 first_element = 0; - bool is_scalar = false; -}; - -spv::Dim GetSamplerDim(const SamplerEntry& sampler) { - ASSERT(!sampler.is_buffer); - switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return spv::Dim::Dim1D; - case Tegra::Shader::TextureType::Texture2D: - return spv::Dim::Dim2D; - case Tegra::Shader::TextureType::Texture3D: - return spv::Dim::Dim3D; - case Tegra::Shader::TextureType::TextureCube: - return spv::Dim::Cube; - default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); - return spv::Dim::Dim2D; - } -} - -std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) { - switch (image.type) { - case Tegra::Shader::ImageType::Texture1D: - return {spv::Dim::Dim1D, false}; - case Tegra::Shader::ImageType::TextureBuffer: - return {spv::Dim::Buffer, false}; - case Tegra::Shader::ImageType::Texture1DArray: - return {spv::Dim::Dim1D, true}; - case Tegra::Shader::ImageType::Texture2D: - return {spv::Dim::Dim2D, false}; - case Tegra::Shader::ImageType::Texture2DArray: - return {spv::Dim::Dim2D, true}; - case Tegra::Shader::ImageType::Texture3D: - return {spv::Dim::Dim3D, false}; - default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); - return {spv::Dim::Dim2D, false}; - } -} - -/// Returns the number of vertices present in a primitive topology. -u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { - switch (primitive_topology) { - case Maxwell::PrimitiveTopology::Points: - return 1; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return 2; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return 3; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return 4; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return 6; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return 3; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return 3; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return 3; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return 3; - default: - UNREACHABLE(); - return 3; - } -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { - switch (primitive) { - case Maxwell::TessellationPrimitive::Isolines: - return spv::ExecutionMode::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return spv::ExecutionMode::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return spv::ExecutionMode::Quads; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { - switch (spacing) { - case Maxwell::TessellationSpacing::Equal: - return spv::ExecutionMode::SpacingEqual; - case Maxwell::TessellationSpacing::FractionalOdd: - return spv::ExecutionMode::SpacingFractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return spv::ExecutionMode::SpacingFractionalEven; - } - UNREACHABLE(); - return spv::ExecutionMode::SpacingEqual; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { - switch (input_topology) { - case Maxwell::PrimitiveTopology::Points: - return spv::ExecutionMode::InputPoints; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return spv::ExecutionMode::InputLines; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return spv::ExecutionMode::InputLinesAdjacency; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return spv::ExecutionMode::InputTrianglesAdjacency; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return spv::ExecutionMode::Triangles; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { - switch (output_topology) { - case Tegra::Shader::OutputTopology::PointList: - return spv::ExecutionMode::OutputPoints; - case Tegra::Shader::OutputTopology::LineStrip: - return spv::ExecutionMode::OutputLineStrip; - case Tegra::Shader::OutputTopology::TriangleStrip: - return spv::ExecutionMode::OutputTriangleStrip; - default: - UNREACHABLE(); - return spv::ExecutionMode::OutputPoints; - } -} - -/// Returns true if an attribute index is one of the 32 generic attributes -constexpr bool IsGenericAttribute(Attribute::Index attribute) { - return attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31; -} - -/// Returns the location of a generic attribute -u32 GetGenericAttributeLocation(Attribute::Index attribute) { - ASSERT(IsGenericAttribute(attribute)); - return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0); -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (std::holds_alternative<MetaArithmetic>(meta)) { - return std::get<MetaArithmetic>(meta).precise; - } - return false; -} - -class SPIRVDecompiler final : public Sirit::Module { -public: - explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, - const Registry& registry_, const Specialization& specialization_) - : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, - registry{registry_}, specialization{specialization_} { - if (stage_ != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); - } - - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); - AddCapability(spv::Capability::ImageQuery); - AddCapability(spv::Capability::Image1D); - AddCapability(spv::Capability::ImageBuffer); - AddCapability(spv::Capability::ImageGatherExtended); - AddCapability(spv::Capability::SampledBuffer); - AddCapability(spv::Capability::StorageImageWriteWithoutFormat); - AddCapability(spv::Capability::DrawParameters); - AddCapability(spv::Capability::SubgroupBallotKHR); - AddCapability(spv::Capability::SubgroupVoteKHR); - AddExtension("SPV_KHR_16bit_storage"); - AddExtension("SPV_KHR_shader_ballot"); - AddExtension("SPV_KHR_subgroup_vote"); - AddExtension("SPV_KHR_storage_buffer_storage_class"); - AddExtension("SPV_KHR_variable_pointers"); - AddExtension("SPV_KHR_shader_draw_parameters"); - - if (!transform_feedback.empty()) { - if (device.IsExtTransformFeedbackSupported()) { - AddCapability(spv::Capability::TransformFeedback); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " - "supported on this device"); - } - } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { - if (ir.UsesViewportIndex()) { - AddCapability(spv::Capability::MultiViewport); - } - if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { - AddExtension("SPV_EXT_shader_viewport_index_layer"); - AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); - } - } - if (device.IsFormatlessImageLoadSupported()) { - AddCapability(spv::Capability::StorageImageReadWithoutFormat); - } - if (device.IsFloat16Supported()) { - AddCapability(spv::Capability::Float16); - } - t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); - t_half = Name(TypeVector(t_scalar_half, 2), "half"); - - const Id main = Decompile(); - - switch (stage) { - case ShaderType::Vertex: - AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); - break; - case ShaderType::TesselationControl: - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common2.threads_per_input_primitive); - break; - case ShaderType::TesselationEval: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); - AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); - AddExecutionMode(main, info.tessellation_clockwise - ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); - break; - } - case ShaderType::Geometry: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Geometry); - AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); - AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common4.max_output_vertices); - // TODO(Rodrigo): Where can we get this info from? - AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); - break; - } - case ShaderType::Fragment: - AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); - if (header.ps.omap.depth) { - AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - } - if (specialization.early_fragment_tests) { - AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); - } - break; - case ShaderType::Compute: - const auto workgroup_size = specialization.workgroup_size; - AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); - break; - } - } - -private: - Id Decompile() { - DeclareCommon(); - DeclareVertex(); - DeclareTessControl(); - DeclareTessEval(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareSharedMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - - u32 binding = specialization.base_binding; - binding = DeclareConstantBuffers(binding); - binding = DeclareGlobalBuffers(binding); - binding = DeclareUniformTexels(binding); - binding = DeclareSamplers(binding); - binding = DeclareStorageTexels(binding); - binding = DeclareImages(binding); - - const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); - AddLabel(); - - if (ir.IsDecompiled()) { - DeclareFlowVariables(); - DecompileAST(); - } else { - AllocateLabels(); - DecompileBranchMode(); - } - - OpReturn(); - OpFunctionEnd(); - - return main; - } - - void DefinePrologue() { - if (stage == ShaderType::Vertex) { - // Clear Position to avoid reading trash on the Z conversion. - const auto position_index = out_indices.position.value(); - const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, v_varying_default); - - if (specialization.point_size) { - const u32 point_size_index = out_indices.point_size.value(); - const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); - OpStore(out_point_size, Constant(t_float, *specialization.point_size)); - } - } - } - - void DecompileAST(); - - void DecompileBranchMode() { - const u32 first_address = ir.GetBasicBlocks().begin()->first; - const Id loop_label = OpLabel("loop"); - const Id merge_label = OpLabel("merge"); - const Id dummy_label = OpLabel(); - const Id jump_label = OpLabel(); - continue_label = OpLabel("continue"); - - std::vector<Sirit::Literal> literals; - std::vector<Id> branch_labels; - for (const auto& [literal, label] : labels) { - literals.push_back(literal); - branch_labels.push_back(label); - } - - jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), - spv::StorageClass::Function, Constant(t_uint, first_address)); - AddLocalVariable(jmp_to); - - std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); - std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); - - Name(jmp_to, "jmp_to"); - Name(ssy_flow_stack, "ssy_flow_stack"); - Name(ssy_flow_stack_top, "ssy_flow_stack_top"); - Name(pbk_flow_stack, "pbk_flow_stack"); - Name(pbk_flow_stack_top, "pbk_flow_stack_top"); - - DefinePrologue(); - - OpBranch(loop_label); - AddLabel(loop_label); - OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); - OpBranch(dummy_label); - - AddLabel(dummy_label); - const Id default_branch = OpLabel(); - const Id jmp_to_load = OpLoad(t_uint, jmp_to); - OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); - OpSwitch(jmp_to_load, default_branch, literals, branch_labels); - - AddLabel(default_branch); - OpReturn(); - - for (const auto& [address, bb] : ir.GetBasicBlocks()) { - AddLabel(labels.at(address)); - - VisitBasicBlock(bb); - - const auto next_it = labels.lower_bound(address + 1); - const Id next_label = next_it != labels.end() ? next_it->second : default_branch; - OpBranch(next_label); - } - - AddLabel(jump_label); - OpBranch(continue_label); - AddLabel(continue_label); - OpBranch(loop_label); - AddLabel(merge_label); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); - - void AllocateLabels() { - for (const auto& pair : ir.GetBasicBlocks()) { - const u32 address = pair.first; - labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); - } - } - - void DeclareCommon() { - thread_id = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); - thread_masks[0] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); - thread_masks[1] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); - thread_masks[2] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); - thread_masks[3] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); - thread_masks[4] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - - // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); - instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); - base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); - base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); - } - - void DeclareTessControl() { - if (stage != ShaderType::TesselationControl) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertexArray(header.common2.threads_per_input_primitive); - - tess_level_outer = DeclareBuiltIn( - spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), - "tess_level_outer"); - Decorate(tess_level_outer, spv::Decoration::Patch); - - tess_level_inner = DeclareBuiltIn( - spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), - "tess_level_inner"); - Decorate(tess_level_inner, spv::Decoration::Patch); - - invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); - } - - void DeclareTessEval() { - if (stage != ShaderType::TesselationEval) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertex(); - - tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); - DeclareInputVertexArray(num_input); - DeclareOutputVertex(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - - for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) { - if (!IsRenderTargetEnabled(rt)) { - continue; - } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); - Name(id, fmt::format("frag_color{}", rt)); - Decorate(id, spv::Decoration::Location, rt); - - frag_colors[rt] = id; - interfaces.push_back(id); - } - - if (header.ps.omap.depth) { - frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); - Name(frag_depth, "frag_depth"); - Decorate(frag_depth, spv::Decoration::BuiltIn, - static_cast<u32>(spv::BuiltIn::FragDepth)); - - interfaces.push_back(frag_depth); - } - - frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); - front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); - point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - - workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); - local_invocation_id = - DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); - } - - void DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("gpr_{}", gpr)); - registers.emplace(gpr, AddGlobalVariable(id)); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("custom_var_{}", i)); - custom_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclarePredicates() { - for (const auto pred : ir.GetPredicates()) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("pred_{}", static_cast<u32>(pred))); - predicates.emplace(pred, AddGlobalVariable(id)); - } - } - - void DeclareFlowVariables() { - for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", static_cast<u32>(i))); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); - if (lmem_size == 0) { - return; - } - const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4); - const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); - const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); - Name(type_pointer, "LocalMemory"); - - local_memory = - OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); - AddGlobalVariable(Name(local_memory, "local_memory")); - } - - void DeclareSharedMemory() { - if (stage != ShaderType::Compute) { - return; - } - t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - - u32 smem_size = specialization.shared_memory_size * 4; - if (smem_size == 0) { - // Avoid declaring an empty array. - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (smem_size > limit) { - LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", - smem_size, limit); - smem_size = limit; - } - - const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); - const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); - Name(type_pointer, "SharedMemory"); - - shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); - AddGlobalVariable(Name(shared_memory, "shared_memory")); - } - - void DeclareInternalFlags() { - static constexpr std::array names{"zero", "sign", "carry", "overflow"}; - - for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); - } - } - - void DeclareInputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Input; - std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); - } - - void DeclareOutputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Output; - std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); - } - - std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class, - std::string name, u32 length) { - const auto [struct_id, indices] = DeclareVertexStruct(); - const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); - const Id vertex_ptr = TypePointer(storage_class, vertex_array); - const Id vertex = OpVariable(vertex_ptr, storage_class); - AddGlobalVariable(Name(vertex, std::move(name))); - interfaces.push_back(vertex); - return {indices, vertex}; - } - - void DeclareOutputVertex() { - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - } - - void DeclareInputAttributes() { - for (const auto index : ir.GetInputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - const u32 location = GetGenericAttributeLocation(index); - if (!IsAttributeEnabled(location)) { - continue; - } - const auto type_descriptor = GetAttributeType(location); - Id type; - if (IsInputAttributeArray()) { - type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); - type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); - type = TypePointer(spv::StorageClass::Input, type); - } else { - type = type_descriptor.vector; - } - const Id id = OpVariable(type, spv::StorageClass::Input); - AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); - input_attributes.emplace(index, id); - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - - if (stage != ShaderType::Fragment) { - continue; - } - switch (header.ps.GetPixelImap(location)) { - case PixelImap::Constant: - Decorate(id, spv::Decoration::Flat); - break; - case PixelImap::Perspective: - // Default - break; - case PixelImap::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - default: - UNREACHABLE_MSG("Unused attribute being fetched"); - } - } - } - - void DeclareOutputAttributes() { - if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { - return; - } - - UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); - for (const auto index : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - DeclareOutputAttribute(index); - } - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - - const u32 location = GetGenericAttributeLocation(index); - u8 element = 0; - while (element < 4) { - const std::size_t remainder = 4 - element; - - std::size_t num_components = remainder; - const std::optional tfb = GetTransformFeedbackInfo(index, element); - if (tfb) { - num_components = tfb->components; - } - - Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); - Id varying_default = v_varying_default; - if (IsOutputAttributeArray()) { - const u32 num = GetNumOutputVertices(); - type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { - // Intel's proprietary driver fails to setup defaults for arrayed output - // attributes. - varying_default = ConstantComposite(type, std::vector(num, varying_default)); - } - } - type = TypePointer(spv::StorageClass::Output, type); - - std::string name = fmt::format("out_attr{}", location); - if (num_components < 4 || element > 0) { - name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); - } - - const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), name); - - GenericVaryingDescription description; - description.id = id; - description.first_element = element; - description.is_scalar = num_components == 1; - for (u32 i = 0; i < num_components; ++i) { - const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); - output_attributes.emplace(offset, description); - } - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - if (element > 0) { - Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); - } - if (tfb && device.IsExtTransformFeedbackSupported()) { - Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); - Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); - Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); - } - - element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); - } - } - - std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { - const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - return it->second; - } - - u32 DeclareConstantBuffers(u32 binding) { - for (const auto& [index, size] : ir.GetConstantBuffers()) { - const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo - : t_cbuf_std140_ubo; - const Id id = OpVariable(type, spv::StorageClass::Uniform); - AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - constant_buffers.emplace(index, id); - } - return binding; - } - - u32 DeclareGlobalBuffers(u32 binding) { - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); - AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(base, id); - } - return binding; - } - - u32 DeclareUniformTexels(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (!sampler.is_buffer) { - continue; - } - ASSERT(!sampler.is_array); - ASSERT(!sampler.is_shadow); - - constexpr auto dim = spv::Dim::Buffer; - constexpr int depth = 0; - constexpr int arrayed = 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); - } - return binding; - } - - u32 DeclareSamplers(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - continue; - } - const auto dim = GetSamplerDim(sampler); - const int depth = sampler.is_shadow ? 1 : 0; - const int arrayed = sampler.is_array ? 1 : 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampler_type = TypeSampledImage(image_type); - const Id sampler_pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampler_type); - const Id type = sampler.is_indexed - ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) - : sampler_type; - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - sampled_images.emplace( - sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); - } - return binding; - } - - u32 DeclareStorageTexels(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type != Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - u32 DeclareImages(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - void DeclareImage(const ImageEntry& image, u32& binding) { - const auto [dim, arrayed] = GetImageDim(image); - constexpr int depth = 0; - constexpr bool ms = false; - constexpr int sampled = 2; // This won't be accessed with a sampler - const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - if (image.is_read && !image.is_written) { - Decorate(id, spv::Decoration::NonWritable); - } else if (image.is_written && !image.is_read) { - Decorate(id, spv::Decoration::NonReadable); - } - - images.emplace(image.index, StorageImage{image_type, id}); - } - - bool IsRenderTargetEnabled(u32 rt) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - return true; - } - } - return false; - } - - bool IsInputAttributeArray() const { - return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || - stage == ShaderType::Geometry; - } - - bool IsOutputAttributeArray() const { - return stage == ShaderType::TesselationControl; - } - - bool IsAttributeEnabled(u32 location) const { - return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; - } - - u32 GetNumInputVertices() const { - switch (stage) { - case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); - case ShaderType::TesselationControl: - case ShaderType::TesselationEval: - return NumInputPatches; - default: - UNREACHABLE(); - return 1; - } - } - - u32 GetNumOutputVertices() const { - switch (stage) { - case ShaderType::TesselationControl: - return header.common2.threads_per_input_primitive; - default: - UNREACHABLE(); - return 1; - } - } - - std::tuple<Id, VertexIndices> DeclareVertexStruct() { - struct BuiltIn { - Id type; - spv::BuiltIn builtin; - const char* name; - }; - std::vector<BuiltIn> members; - members.reserve(4); - - const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { - const auto index = static_cast<u32>(members.size()); - members.push_back(BuiltIn{type, builtin, name}); - return index; - }; - - VertexIndices indices; - indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); - - if (ir.UsesLayer()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); - } else { - LOG_ERROR( - Render_Vulkan, - "Shader requires Layer but it's not supported on this stage with this device."); - } - } - - if (ir.UsesViewportIndex()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " - "this stage with this device."); - } - } - - if (ir.UsesPointSize() || specialization.point_size) { - indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); - } - - const auto& ir_output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = std::any_of( - ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { - return index == Attribute::Index::ClipDistances0123 || - index == Attribute::Index::ClipDistances4567; - }); - if (declare_clip_distances) { - indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), - spv::BuiltIn::ClipDistance, "clip_distances"); - } - - std::vector<Id> member_types; - member_types.reserve(members.size()); - for (std::size_t i = 0; i < members.size(); ++i) { - member_types.push_back(members[i].type); - } - const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); - Decorate(per_vertex_struct, spv::Decoration::Block); - - for (std::size_t index = 0; index < members.size(); ++index) { - const auto& member = members[index]; - MemberName(per_vertex_struct, static_cast<u32>(index), member.name); - MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn, - static_cast<u32>(member.builtin)); - } - - return {per_vertex_struct, indices}; - } - - void VisitBasicBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - const auto operation_index = static_cast<std::size_t>(operation->GetCode()); - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if<GprNode>(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {v_float_zero, Type::Float}; - } - return {OpLoad(t_float, registers.at(index)), Type::Float}; - } - - if (const auto cv = std::get_if<CustomVarNode>(&*node)) { - const u32 index = cv->GetIndex(); - return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; - } - - if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { - return {Constant(t_uint, immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if<PredicateNode>(&*node)) { - const auto value = [&]() -> Id { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return v_true; - case Tegra::Shader::Pred::NeverExecute: - return v_false; - default: - return OpLoad(t_bool, predicates.at(index)); - } - }(); - if (predicate->IsNegated()) { - return {OpLogicalNot(t_bool, value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if<AbufNode>(&*node)) { - const auto attribute = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const auto& buffer = abuf->GetBuffer(); - - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) { - std::vector<Id> members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsInputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - switch (attribute) { - case Attribute::Index::Position: { - if (stage == ShaderType::Fragment) { - return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), - Type::Float}; - } - const std::vector elements = {in_indices.position.value(), element}; - return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; - } - case Attribute::Index::PointCoord: { - switch (element) { - case 0: - case 1: - return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), - Type::Float}; - } - UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); - return {v_float_zero, Type::Float}; - } - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - switch (element) { - case 0: - case 1: - return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), - Type::Float}; - case 2: - return { - OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), - Type::Int}; - case 3: - return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), - Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {Constant(t_uint, 0U), Type::Uint}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - if (element == 3) { - const Id is_front_facing = OpLoad(t_bool, front_facing); - const Id true_value = Constant(t_int, static_cast<s32>(-1)); - const Id false_value = Constant(t_int, 0); - return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {v_float_zero, Type::Float}; - default: - if (!IsGenericAttribute(attribute)) { - break; - } - const u32 location = GetGenericAttributeLocation(attribute); - if (!IsAttributeEnabled(location)) { - // Disabled attributes (also known as constant attributes) always return zero. - return {v_float_zero, Type::Float}; - } - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {v_float_zero, Type::Float}; - } - - if (const auto cbuf = std::get_if<CbufNode>(&*node)) { - const Node& offset = cbuf->GetOffset(); - const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - - Id pointer{}; - if (device.IsKhrUniformBufferStandardLayoutSupported()) { - const Id buffer_offset = - OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); - pointer = - OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); - } else { - Id buffer_index{}; - Id buffer_element{}; - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - } else if (std::holds_alternative<OperationNode>(*offset)) { - // Indirect access - const Id offset_id = AsUint(Visit(offset)); - const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); - const Id final_offset = - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); - buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); - buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); - } - pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, - buffer_element); - } - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto gmem = std::get_if<GmemNode>(&*node)) { - return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; - } - - if (const auto lmem = std::get_if<LmemNode>(&*node)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_prv_float, local_memory, address); - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto smem = std::get_if<SmemNode>(&*node)) { - return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; - } - - if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { - const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag())); - return {OpLoad(t_bool, flag), Type::Bool}; - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - // It's invalid to call conditional on nested nodes, use an operation instead - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - const Id condition = AsBool(Visit(conditional->GetCondition())); - OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); - OpBranchConditional(condition, true_label, skip_label); - AddLabel(true_label); - - conditional_branch_set = true; - inside_branch = false; - VisitBasicBlock(conditional->GetCode()); - conditional_branch_set = false; - if (!inside_branch) { - OpBranch(skip_label); - } else { - inside_branch = false; - } - AddLabel(skip_label); - return {}; - } - - if (const auto comment = std::get_if<CommentNode>(&*node)) { - if (device.HasDebuggingToolAttached()) { - // We should insert comments with OpString instead of using named variables - Name(OpUndef(t_int), comment->GetText()); - } - return {}; - } - - UNREACHABLE(); - return {}; - } - - template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type> - Expression Unary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - - const Id value = (this->*func)(type_def, op_a); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type, - Type type_b = type_a> - Expression Binary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - - const Id value = (this->*func)(type_def, op_a, op_b); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type, - Type type_b = type_a, Type type_c = type_b> - Expression Ternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type, - Type type_b = type_a, Type type_c = type_b, Type type_d = type_c> - Expression Quaternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - const Id op_d = As(Visit(operation[3]), type_d); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target{}; - if (const auto gpr = std::get_if<GprNode>(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit its source - // because it might have side effects. - Visit(src); - return {}; - } - target = {registers.at(gpr->GetIndex()), Type::Float}; - - } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { - const auto& buffer = abuf->GetBuffer(); - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) { - std::vector<Id> members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsOutputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - target = [&]() -> Expression { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex(); attribute) { - case Attribute::Index::Position: { - const u32 index = out_indices.position.value(); - return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; - } - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 1: { - if (!out_indices.layer) { - return {}; - } - const u32 index = out_indices.layer.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 2: { - if (!out_indices.viewport) { - return {}; - } - const u32 index = out_indices.viewport.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 3: { - const auto index = out_indices.point_size.value(); - return {AccessElement(t_out_float, out_vertex, index), Type::Float}; - } - default: - UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); - return {}; - } - case Attribute::Index::ClipDistances0123: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; - } - case Attribute::Index::ClipDistances4567: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element + 4), - Type::Float}; - } - default: - if (IsGenericAttribute(attribute)) { - const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); - const GenericVaryingDescription description = output_attributes.at(offset); - const Id composite = description.id; - std::vector<u32> indices; - if (!description.is_scalar) { - indices.push_back(element - description.first_element); - } - return {ArrayPass(t_out_float, composite, indices), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", - static_cast<u32>(attribute)); - return {}; - } - }(); - - } else if (const auto patch = std::get_if<PatchNode>(&*dest)) { - target = [&]() -> Expression { - const u32 offset = patch->GetOffset(); - switch (offset) { - case 0: - case 1: - case 2: - case 3: - return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; - case 4: - case 5: - return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); - return {}; - }(); - - } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpUDiv(t_uint, address, Constant(t_uint, 4)); - target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; - - } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - target = {GetSharedMemoryPointer(*smem), Type::Uint}; - - } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - - } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { - target = {custom_variables.at(cv->GetIndex()), Type::Float}; - - } else { - UNIMPLEMENTED(); - } - - if (!target.id) { - // On failure we return a nullptr target.id, skip these stores. - return {}; - } - - OpStore(target.id, As(Visit(src), target.type)); - return {}; - } - - template <u32 offset> - Expression FCastHalf(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), - Type::Float}; - } - - Expression FSwizzleAdd(Operation operation) { - const Id minus = Constant(t_float, -1.0f); - const Id plus = v_float_one; - const Id zero = v_float_zero; - const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); - const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); - - Id mask = OpLoad(t_uint, thread_id); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); - mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - - const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); - const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); - - const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); - const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); - return {OpFAdd(t_float, op_a, op_b), Type::Float}; - } - - Expression HNegate(Operation operation) { - const bool is_f16 = device.IsFloat16Supported(); - const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); - const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); - const auto GetNegate = [&](std::size_t index) { - return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); - }; - const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); - return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const auto Pack = [&](std::size_t index) { - const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); - return OpCompositeConstruct(t_half, scalar, scalar); - }; - const Id value = AsHalfFloat(Visit(operation[0])); - const Id min = Pack(1); - const Id max = Pack(2); - - const Id clamped = OpFClamp(t_half, value, min, max); - if (IsPrecise(operation)) { - Decorate(clamped, spv::Decoration::NoContraction); - } - return {clamped, Type::HalfFloat}; - } - - Expression HCastFloat(Operation operation) { - const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = Visit(operation[0]); - const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta()); - if (type == Tegra::Shader::HalfType::H0_H1) { - return operand; - } - const auto value = [&] { - switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { - case Tegra::Shader::HalfType::F32: - return GetHalfScalarFromFloat(AsFloat(operand)); - case Tegra::Shader::HalfType::H0_H0: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); - case Tegra::Shader::HalfType::H1_H1: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); - default: - UNREACHABLE(); - return ConstantNull(t_half); - } - }(); - return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; - } - - Expression HMergeF32(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; - } - - template <u32 offset> - Expression HMergeHN(Operation operation) { - const Id target = AsHalfFloat(Visit(operation[0])); - const Id source = AsHalfFloat(Visit(operation[1])); - const Id object = OpCompositeExtract(t_scalar_half, source, offset); - return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); - return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; - } - - Expression LogicalAddCarry(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - const Id op_b = AsUint(Visit(operation[1])); - - const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); - const Id carry = OpCompositeExtract(t_uint, result, 1); - return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Id target{}; - if (const auto pred = std::get_if<PredicateNode>(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = predicates.at(index); - - } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { - target = internal_flags.at(static_cast<u32>(flag->GetFlag())); - } - - OpStore(target, AsBool(Visit(src))); - return {}; - } - - Expression LogicalFOrdered(Operation operation) { - // Emulate SPIR-V's OpOrdered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); - const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); - return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; - } - - Expression LogicalFUnordered(Operation operation) { - // Emulate SPIR-V's OpUnordered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_nan_a = OpIsNan(t_bool, op_a); - const Id is_nan_b = OpIsNan(t_bool, op_b); - return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; - } - - Id GetTextureSampler(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - ASSERT(!meta.sampler.is_buffer); - - const auto& entry = sampled_images.at(meta.sampler.index); - Id sampler = entry.variable; - if (meta.sampler.is_indexed) { - const Id index = AsInt(Visit(meta.index)); - sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); - } - return OpLoad(entry.sampler_type, sampler); - } - - Id GetTextureImage(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 index = meta.sampler.index; - if (meta.sampler.is_buffer) { - const auto& entry = uniform_texels.at(index); - return OpLoad(entry.image_type, entry.image); - } else { - const auto& entry = sampled_images.at(index); - return OpImage(entry.image_type, GetTextureSampler(operation)); - } - } - - Id GetImage(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const auto entry = images.at(meta.image.index); - return OpLoad(entry.image_type, entry.image); - } - - Id AssembleVector(const std::vector<Id>& coords, Type type) { - const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); - return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); - } - - Id GetCoordinates(Operation operation, Type type) { - std::vector<Id> coords; - for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { - coords.push_back(As(Visit(operation[i]), type)); - } - if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) { - // Add array coordinate for textures - if (meta->sampler.is_array) { - Id array = AsInt(Visit(meta->array)); - if (type == Type::Float) { - array = OpConvertSToF(t_float, array); - } - coords.push_back(array); - } - } - return AssembleVector(coords, type); - } - - Id GetOffsetCoordinates(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - std::vector<Id> coords; - coords.reserve(meta.aoffi.size()); - for (const auto& coord : meta.aoffi) { - coords.push_back(AsInt(Visit(coord))); - } - return AssembleVector(coords, Type::Int); - } - - std::pair<Id, Id> GetDerivatives(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const auto& derivatives = meta.derivates; - ASSERT(derivatives.size() % 2 == 0); - - const std::size_t components = derivatives.size() / 2; - std::vector<Id> dx, dy; - dx.reserve(components); - dy.reserve(components); - for (std::size_t index = 0; index < components; ++index) { - dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); - dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); - } - return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; - } - - Expression GetTextureElement(Operation operation, Id sample_value, Type type) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const auto type_def = GetTypeDefinition(type); - return {OpCompositeExtract(type_def, sample_value, meta.element), type}; - } - - Expression Texture(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - - const bool can_implicit = stage == ShaderType::Fragment; - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - - std::vector<Id> operands; - spv::ImageOperandsMask mask{}; - if (meta.bias) { - mask = mask | spv::ImageOperandsMask::Bias; - operands.push_back(AsFloat(Visit(meta.bias))); - } - - if (!can_implicit) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(v_float_zero); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.depth_compare) { - // Depth sampling - UNIMPLEMENTED_IF(meta.bias); - const Id dref = AsFloat(Visit(meta.depth_compare)); - if (can_implicit) { - return { - OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } else { - return { - OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - } - - Id texture; - if (can_implicit) { - texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); - } else { - texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureLod(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const Id lod = AsFloat(Visit(meta.lod)); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; - std::vector<Id> operands{lod}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - const Id dref = AsFloat(Visit(meta.depth_compare)); - return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - - const Id coords = GetCoordinates(operation, Type::Float); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector<Id> operands; - Id texture{}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare)), mask, operands); - } else { - u32 component_value = 0; - if (meta.component) { - const auto component = std::get_if<ImmediateNode>(&*meta.component); - ASSERT_MSG(component, "Component is not an immediate value"); - component_value = component->GetValue(); - } - texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value), mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureQueryDimensions(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const auto image_id = GetTextureImage(operation); - if (meta.element == 3) { - return {OpImageQueryLevels(t_int, image_id), Type::Int}; - } - - const Id lod = AsUint(Visit(operation[0])); - const std::size_t coords_count = [&meta] { - switch (const auto type = meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return 1; - case Tegra::Shader::TextureType::Texture2D: - case Tegra::Shader::TextureType::TextureCube: - return 2; - case Tegra::Shader::TextureType::Texture3D: - return 3; - default: - UNREACHABLE_MSG("Invalid texture type={}", type); - return 2; - } - }(); - - if (meta.element >= coords_count) { - return {v_float_zero, Type::Float}; - } - - const std::array<Id, 3> types = {t_int, t_int2, t_int3}; - const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); - const Id size = OpCompositeExtract(t_int, sizes, meta.element); - return {size, Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - if (meta.element >= 2) { - UNREACHABLE_MSG("Invalid element"); - return {v_float_zero, Type::Float}; - } - const auto sampler_id = GetTextureSampler(operation); - - const Id multiplier = Constant(t_float, 256.0f); - const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); - - const Id coords = GetCoordinates(operation, Type::Float); - Id size = OpImageQueryLod(t_float2, sampler_id, coords); - size = OpFMul(t_float2, size, multipliers); - size = OpConvertFToS(t_int2, size); - return GetTextureElement(operation, size, Type::Int); - } - - Expression TexelFetch(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const Id image = GetTextureImage(operation); - const Id coords = GetCoordinates(operation, Type::Int); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector<Id> operands; - Id fetch; - - if (meta.lod && !meta.sampler.is_buffer) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(AsInt(Visit(meta.lod))); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - fetch = OpImageFetch(t_float4, image, coords, mask, operands); - return GetTextureElement(operation, fetch, Type::Float); - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const auto [dx, dy] = GetDerivatives(operation); - const std::vector grad = {dx, dy}; - - static constexpr auto mask = spv::ImageOperandsMask::Grad; - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression ImageLoad(Operation operation) { - if (!device.IsFormatlessImageLoadSupported()) { - return {v_float_zero, Type::Float}; - } - - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); - - return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto meta{std::get<MetaImage>(operation.GetMeta())}; - std::vector<Id> colors; - for (const auto& value : meta.values) { - colors.push_back(AsUint(Visit(value))); - } - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpCompositeConstruct(t_uint4, colors); - - OpImageWrite(GetImage(operation), coords, texel, {}); - return {}; - } - - template <Id (Module::*func)(Id, Id, Id, Id, Id)> - Expression AtomicImage(Operation operation) { - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - const Id coordinate = GetCoordinates(operation, Type::Int); - const Id image = images.at(meta.image.index).image; - const Id sample = v_uint_zero; - const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); - - const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(meta.values[0])); - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template <Id (Module::*func)(Id, Id, Id, Id, Id)> - Expression Atomic(Operation operation) { - Id pointer; - if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { - pointer = GetSharedMemoryPointer(*smem); - } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - pointer = GetGlobalMemoryPointer(*gmem); - } else { - UNREACHABLE(); - return {v_float_zero, Type::Float}; - } - const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(operation[1])); - - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template <Id (Module::*func)(Id, Id, Id, Id, Id)> - Expression Reduce(Operation operation) { - Atomic<func>(operation); - return {}; - } - - Expression Branch(Operation operation) { - const auto& target = std::get<ImmediateNode>(*operation[0]); - OpStore(jmp_to, Constant(t_uint, target.GetValue())); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression BranchIndirect(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - - OpStore(jmp_to, op_a); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto& target = std::get<ImmediateNode>(*operation[0]); - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, current); - - OpStore(access, Constant(t_uint, target.GetValue())); - OpStore(flow_stack_top, next); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, previous); - const Id target = OpLoad(t_uint, access); - - OpStore(flow_stack_top, previous); - OpStore(jmp_to, target); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { - using Compare = Maxwell::ComparisonOp; - switch (compare_op) { - case Compare::NeverOld: - return v_false; // Never let the test pass - case Compare::LessOld: - return OpFOrdLessThan(t_bool, operand_1, operand_2); - case Compare::EqualOld: - return OpFOrdEqual(t_bool, operand_1, operand_2); - case Compare::LessEqualOld: - return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); - case Compare::GreaterOld: - return OpFOrdGreaterThan(t_bool, operand_1, operand_2); - case Compare::NotEqualOld: - return OpFOrdNotEqual(t_bool, operand_1, operand_2); - case Compare::GreaterEqualOld: - return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); - default: - UNREACHABLE(); - return v_true; - } - } - - void AlphaTest(Id pointer) { - if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { - return; - } - const Id true_label = OpLabel(); - const Id discard_label = OpLabel(); - const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); - const Id alpha_value = OpLoad(t_float, pointer); - const Id condition = - MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); - - OpBranchConditional(condition, true_label, discard_label); - AddLabel(discard_label); - OpKill(); - AddLabel(true_label); - } - - void PreExit() { - if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { - const u32 position_index = out_indices.position.value(); - const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); - const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); - Id depth = OpLoad(t_float, z_pointer); - depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); - depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); - OpStore(z_pointer, depth); - } - if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [this](u32 reg) { - if (const auto it = registers.find(reg); it != registers.end()) { - return OpLoad(t_float, it->second); - } - return v_float_zero; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, - "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); - OpStore(pointer, SafeGetRegister(current_reg)); - if (rt == 0 && component == 3) { - AlphaTest(pointer); - } - ++current_reg; - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and - // current_reg already contains one past the last color register. - OpStore(frag_depth, SafeGetRegister(current_reg + 1)); - } - } - } - - Expression Exit(Operation operation) { - PreExit(); - inside_branch = true; - if (conditional_branch_set) { - OpReturn(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpReturn(); - AddLabel(); - } - return {}; - } - - Expression Discard(Operation operation) { - inside_branch = true; - if (conditional_branch_set) { - OpKill(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpKill(); - AddLabel(); - } - return {}; - } - - Expression EmitVertex(Operation) { - OpEmitVertex(); - return {}; - } - - Expression EndPrimitive(Operation operation) { - OpEndPrimitive(); - return {}; - } - - Expression InvocationId(Operation) { - return {OpLoad(t_int, invocation_id), Type::Int}; - } - - Expression YNegate(Operation) { - LOG_WARNING(Render_Vulkan, "(STUBBED)"); - return {Constant(t_float, 1.0f), Type::Float}; - } - - template <u32 element> - Expression LocalInvocationId(Operation) { - const Id id = OpLoad(t_uint3, local_invocation_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - template <u32 element> - Expression WorkGroupId(Operation operation) { - const Id id = OpLoad(t_uint3, workgroup_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const Id predicate = AsBool(Visit(operation[0])); - const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); - - if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { - // Guest-like devices can just return the first index. - return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; - } - - // The others will have to return what is local to the current thread. - // For instance a device with a warp size of 64 will return the upper uint when the current - // thread is 38. - const Id tid = OpLoad(t_uint, thread_id); - const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); - return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; - } - - template <Id (Module::*func)(Id, Id)> - Expression Vote(Operation operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id predicate = AsBool(Visit(operation[0])); - return {(this->*func)(t_bool, predicate), Type::Bool}; - } - - Expression ThreadId(Operation) { - return {OpLoad(t_uint, thread_id), Type::Uint}; - } - - template <std::size_t index> - Expression ThreadMask(Operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id mask = thread_masks[index]; - return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - const Id value = AsFloat(Visit(operation[0])); - const Id index = AsUint(Visit(operation[1])); - return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); - return {}; - } - - const auto scope = spv::Scope::Workgroup; - const auto memory = spv::Scope::Workgroup; - const auto semantics = - spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; - OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)), - Constant(t_uint, static_cast<u32>(memory)), - Constant(t_uint, static_cast<u32>(semantics))); - return {}; - } - - template <spv::Scope scope> - Expression MemoryBarrier(Operation) { - const auto semantics = - spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | - spv::MemorySemanticsMask::WorkgroupMemory | - spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; - - OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)), - Constant(t_uint, static_cast<u32>(semantics))); - return {}; - } - - Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { - const Id id = OpVariable(type, storage); - Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin)); - AddGlobalVariable(Name(id, std::move(name))); - interfaces.push_back(id); - return id; - } - - Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { - return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); - } - - template <typename... Args> - Id AccessElement(Id pointer_type, Id composite, Args... elements_) { - std::vector<Id> members; - auto elements = {elements_...}; - for (const auto element : elements) { - members.push_back(Constant(t_uint, element)); - } - - return OpAccessChain(pointer_type, composite, members); - } - - Id As(Expression expr, Type wanted_type) { - switch (wanted_type) { - case Type::Bool: - return AsBool(expr); - case Type::Bool2: - return AsBool2(expr); - case Type::Float: - return AsFloat(expr); - case Type::Int: - return AsInt(expr); - case Type::Uint: - return AsUint(expr); - case Type::HalfFloat: - return AsHalfFloat(expr); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsBool(Expression expr) { - ASSERT(expr.type == Type::Bool); - return expr.id; - } - - Id AsBool2(Expression expr) { - ASSERT(expr.type == Type::Bool2); - return expr.id; - } - - Id AsFloat(Expression expr) { - switch (expr.type) { - case Type::Float: - return expr.id; - case Type::Int: - case Type::Uint: - return OpBitcast(t_float, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_float, expr.id); - } - return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsInt(Expression expr) { - switch (expr.type) { - case Type::Int: - return expr.id; - case Type::Float: - case Type::Uint: - return OpBitcast(t_int, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_int, expr.id); - } - return OpPackHalf2x16(t_int, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsUint(Expression expr) { - switch (expr.type) { - case Type::Uint: - return expr.id; - case Type::Float: - case Type::Int: - return OpBitcast(t_uint, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_uint, expr.id); - } - return OpPackHalf2x16(t_uint, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsHalfFloat(Expression expr) { - switch (expr.type) { - case Type::HalfFloat: - return expr.id; - case Type::Float: - case Type::Int: - case Type::Uint: - if (device.IsFloat16Supported()) { - return OpBitcast(t_half, expr.id); - } - return OpUnpackHalf2x16(t_half, AsUint(expr)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id GetHalfScalarFromFloat(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_scalar_half, value); - } - return value; - } - - Id GetFloatFromHalfScalar(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_float, value); - } - return value; - } - - AttributeType GetAttributeType(u32 location) const { - if (stage != ShaderType::Vertex) { - return {Type::Float, t_in_float, t_in_float4}; - } - switch (specialization.attribute_types.at(location)) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return {Type::Float, t_in_float, t_in_float4}; - case Maxwell::VertexAttribute::Type::SignedInt: - return {Type::Int, t_in_int, t_in_int4}; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return {Type::Uint, t_in_uint, t_in_uint4}; - default: - UNREACHABLE(); - return {Type::Float, t_in_float, t_in_float4}; - } - } - - Id GetTypeDefinition(Type type) const { - switch (type) { - case Type::Bool: - return t_bool; - case Type::Bool2: - return t_bool2; - case Type::Float: - return t_float; - case Type::Int: - return t_int; - case Type::Uint: - return t_uint; - case Type::HalfFloat: - return t_half; - default: - UNREACHABLE(); - return {}; - } - } - - std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { - switch (type) { - case Type::Float: - return {t_float, t_float2, t_float3, t_float4}; - case Type::Int: - return {t_int, t_int2, t_int3, t_int4}; - case Type::Uint: - return {t_uint, t_uint2, t_uint3, t_uint4}; - default: - UNIMPLEMENTED(); - return {}; - } - } - - std::tuple<Id, Id> CreateFlowStack() { - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - constexpr auto storage_class = spv::StorageClass::Function; - - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); - const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, - ConstantNull(flow_stack_type)); - const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); - AddLocalVariable(stack); - AddLocalVariable(top); - return std::tie(stack, top); - } - - std::pair<Id, Id> GetFlowStack(Operation operation) { - const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); - switch (stack_class) { - case MetaStackClass::Ssy: - return {ssy_flow_stack, ssy_flow_stack_top}; - case MetaStackClass::Pbk: - return {pbk_flow_stack, pbk_flow_stack_top}; - } - UNREACHABLE(); - return {}; - } - - Id GetGlobalMemoryPointer(const GmemNode& gmem) { - const Id real = AsUint(Visit(gmem.GetRealAddress())); - const Id base = AsUint(Visit(gmem.GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - const Id buffer = global_buffers.at(gmem.GetDescriptor()); - return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); - } - - Id GetSharedMemoryPointer(const SmemNode& smem) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - return OpAccessChain(t_smem_uint, shared_memory, address); - } - - static constexpr std::array operation_decompilers = { - &SPIRVDecompiler::Assign, - - &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, - Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, - &SPIRVDecompiler::FCastHalf<0>, - &SPIRVDecompiler::FCastHalf<1>, - &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, - &SPIRVDecompiler::FSwizzleAdd, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, - - &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, - &SPIRVDecompiler::HNegate, - &SPIRVDecompiler::HClamp, - &SPIRVDecompiler::HCastFloat, - &SPIRVDecompiler::HUnpack, - &SPIRVDecompiler::HMergeF32, - &SPIRVDecompiler::HMergeHN<0>, - &SPIRVDecompiler::HMergeHN<1>, - &SPIRVDecompiler::HPack2, - - &SPIRVDecompiler::LogicalAssign, - &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, - &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, - Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::LogicalFOrdered, - &SPIRVDecompiler::LogicalFUnordered, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, - - &SPIRVDecompiler::LogicalAddCarry, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - // TODO(Rodrigo): Should these use the OpFUnord* variants? - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - - &SPIRVDecompiler::Texture, - &SPIRVDecompiler::TextureLod, - &SPIRVDecompiler::TextureGather, - &SPIRVDecompiler::TextureQueryDimensions, - &SPIRVDecompiler::TextureQueryLod, - &SPIRVDecompiler::TexelFetch, - &SPIRVDecompiler::TextureGradient, - - &SPIRVDecompiler::ImageLoad, - &SPIRVDecompiler::ImageStore, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Branch, - &SPIRVDecompiler::BranchIndirect, - &SPIRVDecompiler::PushFlowStack, - &SPIRVDecompiler::PopFlowStack, - &SPIRVDecompiler::Exit, - &SPIRVDecompiler::Discard, - - &SPIRVDecompiler::EmitVertex, - &SPIRVDecompiler::EndPrimitive, - - &SPIRVDecompiler::InvocationId, - &SPIRVDecompiler::YNegate, - &SPIRVDecompiler::LocalInvocationId<0>, - &SPIRVDecompiler::LocalInvocationId<1>, - &SPIRVDecompiler::LocalInvocationId<2>, - &SPIRVDecompiler::WorkGroupId<0>, - &SPIRVDecompiler::WorkGroupId<1>, - &SPIRVDecompiler::WorkGroupId<2>, - - &SPIRVDecompiler::BallotThread, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, - - &SPIRVDecompiler::ThreadId, - &SPIRVDecompiler::ThreadMask<0>, // Eq - &SPIRVDecompiler::ThreadMask<1>, // Ge - &SPIRVDecompiler::ThreadMask<2>, // Gt - &SPIRVDecompiler::ThreadMask<3>, // Le - &SPIRVDecompiler::ThreadMask<4>, // Lt - &SPIRVDecompiler::ShuffleIndexed, - - &SPIRVDecompiler::Barrier, - &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, - &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, - }; - static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); - - const Device& device; - const ShaderIR& ir; - const ShaderType stage; - const Tegra::Shader::Header header; - const Registry& registry; - const Specialization& specialization; - std::unordered_map<u8, VaryingTFB> transform_feedback; - - const Id t_void = Name(TypeVoid(), "void"); - - const Id t_bool = Name(TypeBool(), "bool"); - const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); - - const Id t_int = Name(TypeInt(32, true), "int"); - const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); - const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); - const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); - - const Id t_uint = Name(TypeInt(32, false), "uint"); - const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); - const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); - const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); - - const Id t_float = Name(TypeFloat(32), "float"); - const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); - const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); - const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); - - const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); - const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); - - const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); - - const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); - const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); - const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); - const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); - const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); - const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); - const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); - const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); - const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); - const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); - - const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); - - const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); - const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); - - const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_std140 = Decorate( - Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), - spv::Decoration::ArrayStride, 16U); - const Id t_cbuf_scalar = Decorate( - Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), - spv::Decoration::ArrayStride, 4U); - const Id t_cbuf_std140_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_scalar_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); - const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); - - Id t_smem_uint{}; - - const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); - const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); - const Id t_gmem_struct = MemberDecorate( - Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); - - const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); - - const Id v_float_zero = Constant(t_float, 0.0f); - const Id v_float_one = Constant(t_float, 1.0f); - const Id v_uint_zero = Constant(t_uint, 0); - - // Nvidia uses these defaults for varyings (e.g. position and generic attributes) - const Id v_varying_default = - ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); - - const Id v_true = ConstantTrue(t_bool); - const Id v_false = ConstantFalse(t_bool); - - Id t_scalar_half{}; - Id t_half{}; - - Id out_vertex{}; - Id in_vertex{}; - std::map<u32, Id> registers; - std::map<u32, Id> custom_variables; - std::map<Tegra::Shader::Pred, Id> predicates; - std::map<u32, Id> flow_variables; - Id local_memory{}; - Id shared_memory{}; - std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; - std::map<Attribute::Index, Id> input_attributes; - std::unordered_map<u8, GenericVaryingDescription> output_attributes; - std::map<u32, Id> constant_buffers; - std::map<GlobalMemoryBase, Id> global_buffers; - std::map<u32, TexelBuffer> uniform_texels; - std::map<u32, SampledImage> sampled_images; - std::map<u32, StorageImage> images; - - std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; - Id instance_index{}; - Id vertex_index{}; - Id base_instance{}; - Id base_vertex{}; - Id frag_depth{}; - Id frag_coord{}; - Id front_facing{}; - Id point_coord{}; - Id tess_level_outer{}; - Id tess_level_inner{}; - Id tess_coord{}; - Id invocation_id{}; - Id workgroup_id{}; - Id local_invocation_id{}; - Id thread_id{}; - std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt - - VertexIndices in_indices; - VertexIndices out_indices; - - std::vector<Id> interfaces; - - Id jmp_to{}; - Id ssy_flow_stack_top{}; - Id pbk_flow_stack_top{}; - Id ssy_flow_stack{}; - Id pbk_flow_stack{}; - Id continue_label{}; - std::map<u32, Id> labels; - - bool conditional_branch_set{}; - bool inside_branch{}; -}; - -class ExprDecompiler { -public: - explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - Id operator()(const ExprAnd& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalAnd(type_def, op1, op2); - } - - Id operator()(const ExprOr& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalOr(type_def, op1, op2); - } - - Id operator()(const ExprNot& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - return decomp.OpLogicalNot(type_def, op1); - } - - Id operator()(const ExprPredicate& expr) { - const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); - return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); - } - - Id operator()(const ExprCondCode& expr) { - return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); - } - - Id operator()(const ExprVar& expr) { - return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); - } - - Id operator()(const ExprBoolean& expr) { - return expr.value ? decomp.v_true : decomp.v_false; - } - - Id operator()(const ExprGprEqual& expr) { - const Id target = decomp.Constant(decomp.t_uint, expr.value); - Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); - gpr = decomp.OpBitcast(decomp.t_uint, gpr); - return decomp.OpIEqual(decomp.t_bool, gpr, target); - } - - Id Visit(const Expr& node) { - return std::visit(*this, *node); - } - -private: - SPIRVDecompiler& decomp; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(endif_label); - decomp.AddLabel(endif_label); - } - - void operator()([[maybe_unused]] const ASTIfElse& ast) { - UNREACHABLE(); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBasicBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpStore(decomp.flow_variables.at(ast.index), condition); - } - - void operator()([[maybe_unused]] const ASTLabel& ast) { - // Do nothing - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - const Id loop_label = decomp.OpLabel(); - const Id endloop_label = decomp.OpLabel(); - const Id loop_start_block = decomp.OpLabel(); - const Id loop_continue_block = decomp.OpLabel(); - current_loop_exit = endloop_label; - decomp.OpBranch(loop_label); - decomp.AddLabel(loop_label); - decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); - decomp.OpBranch(loop_start_block); - decomp.AddLabel(loop_start_block); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(loop_continue_block); - decomp.AddLabel(loop_continue_block); - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpBranchConditional(condition, loop_label, endloop_label); - decomp.AddLabel(endloop_label); - } - - void operator()(const ASTReturn& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(decomp.OpLabel()); - } - } - - void operator()(const ASTBreak& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(decomp.OpLabel()); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - SPIRVDecompiler& decomp; - Id current_loop_exit{}; -}; - -void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", i)); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - - DefinePrologue(); - - const ASTNode program = ir.GetASTProgram(); - ASTDecompiler decompiler{*this}; - decompiler.Visit(program); - - const Id next_block = OpLabel(); - OpBranch(next_block); - AddLabel(next_block); -} - -} // Anonymous namespace - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second, cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_buffers.emplace_back(GlobalBufferEntry{ - .cbuf_index = base.cbuf_index, - .cbuf_offset = base.cbuf_offset, - .is_written = usage.is_written, - }); - } - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - entries.uniform_texels.emplace_back(sampler); - } else { - entries.samplers.emplace_back(sampler); - } - } - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - entries.storage_texels.emplace_back(image); - } else { - entries.images.emplace_back(image); - } - } - for (const auto& attribute : ir.GetInputAttributes()) { - if (IsGenericAttribute(attribute)) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); - } - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - entries.uses_warps = ir.UsesWarps(); - return entries; -} - -std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const VideoCommon::Shader::Registry& registry, - const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <set> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace Vulkan { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using StorageTexelEntry = VideoCommon::Shader::ImageEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -constexpr u32 DESCRIPTOR_SET = 0; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) - : ConstBuffer{entry_}, index{index_} {} - - constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -struct GlobalBufferEntry { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_written{}; -}; - -struct ShaderEntries { - u32 NumBindings() const { - return static_cast<u32>(const_buffers.size() + global_buffers.size() + - uniform_texels.size() + samplers.size() + storage_texels.size() + - images.size()); - } - - std::vector<ConstBufferEntry> const_buffers; - std::vector<GlobalBufferEntry> global_buffers; - std::vector<UniformTexelEntry> uniform_texels; - std::vector<SamplerEntry> samplers; - std::vector<StorageTexelEntry> storage_texels; - std::vector<ImageEntry> images; - std::set<u32> attributes; - std::array<bool, Maxwell::NumClipDistances> clip_distances{}; - std::size_t shader_length{}; - u32 enabled_uniform_buffers{}; - bool uses_warps{}; -}; - -struct Specialization final { - u32 base_binding{}; - - // Compute specific - std::array<u32, 3> workgroup_size{}; - u32 shared_memory_size{}; - - // Graphics specific - std::optional<float> point_size; - std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; - std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; - bool ndc_minus_one_to_one{}; - bool early_fragment_tests{}; - float alpha_test_ref{}; - Maxwell::ComparisonOp alpha_test_func{}; -}; -// Old gcc versions don't consider this trivially copyable. -// static_assert(std::is_trivially_copyable_v<Specialization>); - -struct SPIRVShader { - std::vector<u32> code; - ShaderEntries entries; -}; - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); - -std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, - const VideoCommon::Shader::Registry& registry, - const Specialization& specialization); - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, + DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { flags[index] = true; } + for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { + flags[index] = true; + } + for (int index = VertexBinding0; index <= VertexBinding31; ++index) { + flags[index] = true; + } return flags; } @@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(cull_face)] = CullMode; @@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); } -void SetupDirtyInstanceDivisors(Tables& tables) { - static constexpr size_t divisor_offset = 3; - for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { - tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; - tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = - InstanceDivisors; +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; } } void SetupDirtyVertexAttributes(Tables& tables) { - FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); + for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); + } + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); } -void SetupDirtyViewportSwizzles(Tables& tables) { - static constexpr size_t swizzle_offset = 6; - for (size_t index = 0; index < Regs::NumViewports; ++index) { - tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = - ViewportSwizzles; +void SetupDirtyVertexBindings(Tables& tables) { + // Do NOT include stride here, it's implicit in VertexBuffer + static constexpr size_t divisor_offset = 3; + for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const u8 flag = static_cast<u8>(VertexBinding0 + i); + tables[0][OFF(instanced_arrays) + i] = VertexInput; + tables[1][OFF(instanced_arrays) + i] = flag; + tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; + tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; } } } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables = gpu.Maxwell3D().dirty.tables; + auto& tables{gpu.Maxwell3D().dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); SetupDirtyDepthBoundsEnable(tables); SetupDirtyDepthTestEnable(tables); @@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); - SetupDirtyInstanceDivisors(tables); - SetupDirtyVertexAttributes(tables); SetupDirtyViewportSwizzles(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyVertexBindings(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -19,12 +19,19 @@ namespace Dirty { enum : u8 { First = VideoCommon::Dirty::LastCommonEntry, + VertexInput, + VertexAttribute0, + VertexAttribute31 = VertexAttribute0 + 31, + VertexBinding0, + VertexBinding31 = VertexBinding0 + 31, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, StencilProperties, + LineWidth, CullMode, DepthBoundsEnable, @@ -36,11 +43,9 @@ enum : u8 { StencilTestEnable, Blending, - InstanceDivisors, - VertexAttributes, ViewportSwizzles, - Last + Last, }; static_assert(Last <= std::numeric_limits<u8>::max()); @@ -89,6 +94,10 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchLineWidth() const { + return Exchange(Dirty::LineWidth, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { + is_outdated = false; + is_suboptimal = false; + const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { @@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { resource_ticks.resize(image_count); } -bool VKSwapchain::AcquireNextImage() { - const VkResult result = - device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), - *present_semaphores[frame_index], {}, &image_index); - +void VKSwapchain::AcquireNextImage() { + const VkResult result = device.GetLogical().AcquireNextImageKHR( + *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], + VK_NULL_HANDLE, &image_index); + switch (result) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + is_suboptimal = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); + break; + } scheduler.Wait(resource_ticks[image_index]); - return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; + resource_ticks[image_index] = scheduler.CurrentTick(); } -bool VKSwapchain::Present(VkSemaphore render_semaphore) { +void VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; - bool recreated = false; - const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, @@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; - switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: break; @@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - if (current_width > 0 && current_height > 0) { - Create(current_width, current_height, current_srgb); - recreated = true; - } + is_outdated = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); break; } - - resource_ticks[image_index] = scheduler.CurrentTick(); - frame_index = (frame_index + 1) % static_cast<u32>(image_count); - return recreated; -} - -bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { - // TODO(Rodrigo): Handle framebuffer pixel format changes - return framebuffer.width != current_width || framebuffer.height != current_height; + ++frame_index; + if (frame_index >= image_count) { + frame_index = 0; + } } void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, @@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, @@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, .clipped = VK_FALSE, .oldSwapchain = nullptr, }; - const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array<u32, 2> queue_indices{graphics_family, present_family}; @@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } - // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; - current_width = extent.width; - current_height = extent.height; current_srgb = srgb; images = swapchain.GetImages(); @@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, void VKSwapchain::CreateSemaphores() { present_semaphores.resize(image_count); - std::generate(present_semaphores.begin(), present_semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(present_semaphores, + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -28,14 +28,25 @@ public: void Create(u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - bool AcquireNextImage(); + void AcquireNextImage(); - /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be - /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore); + /// Presents the rendered image to the swapchain. + void Present(VkSemaphore render_semaphore); - /// Returns true when the framebuffer layout has changed. - bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; + /// Returns true when the color space has changed. + bool HasColorSpaceChanged(bool is_srgb) const { + return current_srgb != is_srgb; + } + + /// Returns true when the swapchain is outdated. + bool IsOutDated() const { + return is_outdated; + } + + /// Returns true when the swapchain is suboptimal. + bool IsSubOptimal() const { + return is_suboptimal; + } VkExtent2D GetSize() const { return extent; @@ -61,10 +72,6 @@ public: return image_format; } - bool GetSrgbState() const { - return current_srgb; - } - private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); @@ -92,9 +99,9 @@ private: VkFormat image_format{}; VkExtent2D extent{}; - u32 current_width{}; - u32 current_height{}; bool current_srgb{}; + bool is_outdated{}; + bool is_suboptimal{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,6 +15,7 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { if (color == std::array<float, 4>{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { - if (info.type != ImageType::Buffer) { - return vk::Buffer{}; - } - const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); - return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = info.size.width * bytes_per_block, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); -} - [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { switch (VideoCore::Surface::GetFormatType(format)) { case VideoCore::Surface::SurfaceType::ColorTexture: @@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return VK_COMPONENT_SWIZZLE_ZERO; } +[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + return VK_IMAGE_VIEW_TYPE_1D; + case Shader::TextureType::Color2D: + return VK_IMAGE_VIEW_TYPE_2D; + case Shader::TextureType::ColorCube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case Shader::TextureType::Color3D: + return VK_IMAGE_VIEW_TYPE_3D; + case Shader::TextureType::ColorArray1D: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case Shader::TextureType::ColorArray2D: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case Shader::TextureType::ColorArrayCube: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case Shader::TextureType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; + } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} + [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { switch (type) { case VideoCommon::ImageViewType::e1D: @@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { case VideoCommon::ImageViewType::CubeArray: return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case VideoCommon::ImageViewType::Rect: - LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + UNIMPLEMENTED_MSG("Rect image view"); return VK_IMAGE_VIEW_TYPE_2D; case VideoCommon::ImageViewType::Buffer: UNREACHABLE_MSG("Texture buffers can't be image views"); @@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { std::vector<VkBufferCopy> result(copies.size()); std::ranges::transform( @@ -587,6 +563,28 @@ struct RangedBarrierRange { } }; +[[nodiscard]] VkFormat Format(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return VK_FORMAT_R8_SINT; + case Shader::ImageFormat::R8_UINT: + return VK_FORMAT_R8_UINT; + case Shader::ImageFormat::R16_UINT: + return VK_FORMAT_R16_UINT; + case Shader::ImageFormat::R16_SINT: + return VK_FORMAT_R16_SINT; + case Shader::ImageFormat::R32_UINT: + return VK_FORMAT_R32_UINT; + case Shader::ImageFormat::R32G32_UINT: + return VK_FORMAT_R32G32_UINT; + case Shader::ImageFormat::R32G32B32A32_UINT: + return VK_FORMAT_R32G32B32A32_UINT; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return VK_FORMAT_R32_UINT; +} } // Anonymous namespace void TextureCacheRuntime::Finish() { @@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } } - ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(src.format == dst.format); ASSERT(!(is_dst_msaa && !is_src_msaa)); ASSERT(operation == Fermi2D::Operation::SrcCopy); @@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + image(MakeImage(runtime.device, info)), + commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (image) { - commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - } else { - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } if (runtime.device.HasDebuggingToolAttached()) { - if (image) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } else { - buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, @@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag }); } -void Image::UploadMemory(const StagingBufferRef& map, - std::span<const VideoCommon::BufferCopy> copies) { - // TODO: Move this to another API - scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, map.offset); - const VkBuffer src_buffer = map.buffer; - const VkBuffer dst_buffer = *buffer; - scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - }); -} - void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, - image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( - image.info.num_samples)} { + image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} { + using Shader::TextureType; + const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); std::array<SwizzleSource, 4> swizzle{ SwizzleSource::R, @@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; - const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { + const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) { VkImageViewCreateInfo ci{create_info}; - ci.viewType = ImageViewType(view_type); + ci.viewType = ImageViewType(tex_type); if (num_layers) { ci.subresourceRange.layerCount = *num_layers; } vk::ImageView handle = device->GetLogical().CreateImageView(ci); if (device->HasDebuggingToolAttached()) { - handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } - image_views[static_cast<size_t>(view_type)] = std::move(handle); + image_views[static_cast<size_t>(tex_type)] = std::move(handle); }; switch (info.type) { case VideoCommon::ImageViewType::e1D: case VideoCommon::ImageViewType::e1DArray: - create(VideoCommon::ImageViewType::e1D, 1); - create(VideoCommon::ImageViewType::e1DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e1DArray); + create(TextureType::Color1D, 1); + create(TextureType::ColorArray1D, std::nullopt); + render_target = Handle(TextureType::ColorArray1D); break; case VideoCommon::ImageViewType::e2D: case VideoCommon::ImageViewType::e2DArray: - create(VideoCommon::ImageViewType::e2D, 1); - create(VideoCommon::ImageViewType::e2DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e2DArray); + create(TextureType::Color2D, 1); + create(TextureType::ColorArray2D, std::nullopt); + render_target = Handle(Shader::TextureType::ColorArray2D); break; case VideoCommon::ImageViewType::e3D: - create(VideoCommon::ImageViewType::e3D, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e3D); + create(TextureType::Color3D, std::nullopt); + render_target = Handle(Shader::TextureType::Color3D); break; case VideoCommon::ImageViewType::Cube: case VideoCommon::ImageViewType::CubeArray: - create(VideoCommon::ImageViewType::Cube, 6); - create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + create(TextureType::ColorCube, 6); + create(TextureType::ColorArrayCube, std::nullopt); break; case VideoCommon::ImageViewType::Rect: UNIMPLEMENTED(); break; case VideoCommon::ImageViewType::Buffer: - buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = image.Buffer(), - .format = format_info.format, - .offset = 0, // TODO: Redesign buffer cache to support this - .range = image.guest_size_bytes, - }); + UNREACHABLE(); break; } } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params} {} @@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; } - depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); return *depth_view; } @@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() { if (stencil_view) { return *stencil_view; } - stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); return *stencil_view; } -vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { +VkImageView ImageView::StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique<StorageViews>(); + } + auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + auto& view{views[static_cast<size_t>(texture_type)]}; + if (view) { + return *view; + } + view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); + return *view; +} + +vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = image_handle, .viewType = ImageViewType(type), - .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, + .format = vk_format, .components{ .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector<VkAttachmentDescription> descriptions; std::vector<VkImageView> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM renderpass_key.color_formats[index] = PixelFormat::Invalid; continue; } - descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); attachments.push_back(color_buffer->RenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM ++num_images; } const size_t num_colors = attachments.size(); - const VkAttachmentReference* depth_attachment = - depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; if (depth_buffer) { - descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); attachments.push_back(depth_buffer->RenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM } renderpass_key.samples = samples; - const auto& device = runtime.device.GetLogical(); - const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); - if (is_new) { - const VkSubpassDescription subpass{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast<u32>(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast<u32>(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast<u32>(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,6 +7,7 @@ #include <compare> #include <span> +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -26,35 +27,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array<PixelFormat, NUM_RT> color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash<Vulkan::RenderPassKey> { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast<size_t>(key.depth_format) << 48; - value ^= static_cast<size_t>(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,13 +38,13 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, @@ -79,7 +55,7 @@ struct TextureCacheRuntime { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + bool CanAccelerateImageUpload(Image&) const noexcept { return false; } @@ -117,8 +93,6 @@ public: void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); - void DownloadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); @@ -126,10 +100,6 @@ public: return *image; } - [[nodiscard]] VkBuffer Buffer() const noexcept { - return *buffer; - } - [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { return aspect_mask; } @@ -146,7 +116,6 @@ public: private: VKScheduler* scheduler; vk::Image image; - vk::Buffer buffer; MemoryCommit commit; vk::ImageView image_view; std::vector<vk::ImageView> storage_image_views; @@ -157,18 +126,19 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] VkImageView DepthView(); [[nodiscard]] VkImageView StencilView(); - [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { - return *image_views[static_cast<size_t>(query_type)]; - } + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); - [[nodiscard]] VkBufferView BufferView() const noexcept { - return *buffer_view; + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { + return *image_views[static_cast<size_t>(texture_type)]; } [[nodiscard]] VkImage ImageHandle() const noexcept { @@ -179,26 +149,36 @@ public: return render_target; } - [[nodiscard]] PixelFormat ImageFormat() const noexcept { - return image_format; - } - [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { return samples; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); + struct StorageViews { + std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds; + std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds; + }; + + [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; - std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; + std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views; + std::unique_ptr<StorageViews> storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; - vk::BufferView buffer_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; - PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -15,7 +15,9 @@ namespace Vulkan { VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) - : device{device_}, scheduler{scheduler_} {} + : device{device_}, scheduler{scheduler_} { + payload_cursor = payload.data(); +} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; @@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, - VkDescriptorSet set) { - const void* const data = upload_start; - const vk::Device* const logical = &device.GetLogical(); - scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, data); - }); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,9 @@ public: void Acquire(); - void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); + const DescriptorUpdateEntry* UpdateData() const noexcept { + return upload_start; + } void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null @@ -1,752 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <string> -#include <string_view> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { - -ASTZipper::ASTZipper() = default; - -void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { - ASSERT(new_first->manager == nullptr); - first = new_first; - last = new_first; - - ASTNode current = first; - while (current) { - current->manager = this; - current->parent = parent; - last = current; - current = current->next; - } -} - -void ASTZipper::PushBack(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous = last; - if (last) { - last->next = new_node; - } - new_node->next.reset(); - last = new_node; - if (!first) { - first = new_node; - } - new_node->manager = this; -} - -void ASTZipper::PushFront(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous.reset(); - new_node->next = first; - if (first) { - first->previous = new_node; - } - if (last == first) { - last = new_node; - } - first = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushFront(new_node); - return; - } - const ASTNode next = at_node->next; - if (next) { - next->previous = new_node; - } - new_node->previous = at_node; - if (at_node == last) { - last = new_node; - } - new_node->next = next; - at_node->next = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushBack(new_node); - return; - } - const ASTNode previous = at_node->previous; - if (previous) { - previous->next = new_node; - } - new_node->next = at_node; - if (at_node == first) { - first = new_node; - } - new_node->previous = previous; - at_node->previous = new_node; - new_node->manager = this; -} - -void ASTZipper::DetachTail(ASTNode node) { - ASSERT(node->manager == this); - if (node == first) { - first.reset(); - last.reset(); - return; - } - - last = node->previous; - last->next.reset(); - node->previous.reset(); - - ASTNode current = std::move(node); - while (current) { - current->manager = nullptr; - current->parent.reset(); - current = current->next; - } -} - -void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { - ASSERT(start->manager == this && end->manager == this); - if (start == end) { - DetachSingle(start); - return; - } - const ASTNode prev = start->previous; - const ASTNode post = end->next; - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - start->previous.reset(); - end->next.reset(); - ASTNode current = start; - bool found = false; - while (current) { - current->manager = nullptr; - current->parent.reset(); - found |= current == end; - current = current->next; - } - ASSERT(found); -} - -void ASTZipper::DetachSingle(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode prev = node->previous; - const ASTNode post = node->next; - node->previous.reset(); - node->next.reset(); - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - - node->manager = nullptr; - node->parent.reset(); -} - -void ASTZipper::Remove(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode next = node->next; - const ASTNode previous = node->previous; - if (previous) { - previous->next = next; - } - if (next) { - next->previous = previous; - } - node->parent.reset(); - node->manager = nullptr; - if (node == last) { - last = previous; - } - if (node == first) { - first = next; - } -} - -class ExprPrinter final { -public: - void operator()(const ExprAnd& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += "!"; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - inner += fmt::format("P{}", expr.predicate); - } - - void operator()(const ExprCondCode& expr) { - inner += fmt::format("CC{}", expr.cc); - } - - void operator()(const ExprVar& expr) { - inner += fmt::format("V{}", expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(const ExprGprEqual& expr) { - inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string inner; -}; - -class ASTPrinter { -public: - void operator()(const ASTProgram& ast) { - scope++; - inner += "program {\n"; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - inner += "}\n"; - scope--; - } - - void operator()(const ASTIfThen& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}}\n", Indent()); - } - - void operator()(const ASTIfElse& ast) { - inner += Indent(); - inner += "else {\n"; - - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - - inner += Indent(); - inner += "}\n"; - } - - void operator()(const ASTBlockEncoded& ast) { - inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); - } - - void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { - inner += Indent(); - inner += "Block;\n"; - } - - void operator()(const ASTVarSet& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - inner += fmt::format("Label_{}:\n", ast.index); - } - - void operator()(const ASTGoto& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += - fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); - } - - void operator()(const ASTDoWhile& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}do {{\n", Indent()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), - ast.kills ? "discard" : "exit"); - } - - void operator()(const ASTBreak& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string_view Indent() { - if (space_segment_scope == scope) { - return space_segment; - } - - // Ensure that we don't exceed our view. - ASSERT(scope * 2 < spaces.size()); - - space_segment = spaces.substr(0, scope * 2); - space_segment_scope = scope; - return space_segment; - } - - std::string inner{}; - std::string_view space_segment; - - u32 scope{}; - u32 space_segment_scope{}; - - static constexpr std::string_view spaces{" "}; -}; - -std::string ASTManager::Print() const { - ASTPrinter printer{}; - printer.Visit(main_node); - return printer.GetResult(); -} - -ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) - : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} - -ASTManager::~ASTManager() { - Clear(); -} - -void ASTManager::Init() { - main_node = ASTBase::Make<ASTProgram>(ASTNode{}); - program = std::get_if<ASTProgram>(main_node->GetInnerData()); - false_condition = MakeExpr<ExprBoolean>(false); -} - -void ASTManager::DeclareLabel(u32 address) { - const auto pair = labels_map.emplace(address, labels_count); - if (pair.second) { - labels_count++; - labels.resize(labels_count); - } -} - -void ASTManager::InsertLabel(u32 address) { - const u32 index = labels_map[address]; - const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); - labels[index] = label; - program->nodes.PushBack(label); -} - -void ASTManager::InsertGoto(Expr condition, u32 address) { - const u32 index = labels_map[address]; - const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); - gotos.push_back(goto_node); - program->nodes.PushBack(goto_node); -} - -void ASTManager::InsertBlock(u32 start_address, u32 end_address) { - ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); - program->nodes.PushBack(std::move(block)); -} - -void ASTManager::InsertReturn(Expr condition, bool kills) { - ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); - program->nodes.PushBack(std::move(node)); -} - -// The decompile algorithm is based on -// "Taming control flow: A structured approach to eliminating goto statements" -// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be -// on the same structured level as the label which they jump to. This is done, -// through outward/inward movements and lifting. Once they are at the same -// level, you can enclose them in an "if" structure or a "do-while" structure. -void ASTManager::Decompile() { - auto it = gotos.begin(); - while (it != gotos.end()) { - const ASTNode goto_node = *it; - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - const ASTNode label = labels[*label_index]; - if (!full_decompile) { - // We only decompile backward jumps - if (!IsBackwardsJump(goto_node, label)) { - it++; - continue; - } - } - if (IndirectlyRelated(goto_node, label)) { - while (!DirectlyRelated(goto_node, label)) { - MoveOutward(goto_node); - } - } - if (DirectlyRelated(goto_node, label)) { - u32 goto_level = goto_node->GetLevel(); - const u32 label_level = label->GetLevel(); - while (label_level < goto_level) { - MoveOutward(goto_node); - goto_level--; - } - // TODO(Blinkhawk): Implement Lifting and Inward Movements - } - if (label->GetParent() == goto_node->GetParent()) { - bool is_loop = false; - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label) { - is_loop = true; - break; - } - current = current->GetPrevious(); - } - - if (is_loop) { - EncloseDoWhile(goto_node, label); - } else { - EncloseIfThen(goto_node, label); - } - it = gotos.erase(it); - continue; - } - it++; - } - if (full_decompile) { - for (const ASTNode& label : labels) { - auto& manager = label->GetManager(); - manager.Remove(label); - } - labels.clear(); - } else { - auto label_it = labels.begin(); - while (label_it != labels.end()) { - bool can_remove = true; - ASTNode label = *label_it; - for (const ASTNode& goto_node : gotos) { - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - ASTNode& glabel = labels[*label_index]; - if (glabel == label) { - can_remove = false; - break; - } - } - if (can_remove) { - label->MarkLabelUnused(); - } - } - } -} - -bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { - u32 goto_level = goto_node->GetLevel(); - u32 label_level = label_node->GetLevel(); - while (goto_level > label_level) { - goto_level--; - goto_node = goto_node->GetParent(); - } - while (label_level > goto_level) { - label_level--; - label_node = label_node->GetParent(); - } - while (goto_node->GetParent() != label_node->GetParent()) { - goto_node = goto_node->GetParent(); - label_node = label_node->GetParent(); - } - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label_node) { - return true; - } - current = current->GetPrevious(); - } - return false; -} - -bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { - return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); -} - -bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { - if (first->GetParent() == second->GetParent()) { - return false; - } - const u32 first_level = first->GetLevel(); - const u32 second_level = second->GetLevel(); - u32 min_level; - u32 max_level; - ASTNode max; - ASTNode min; - if (first_level > second_level) { - min_level = second_level; - min = second; - max_level = first_level; - max = first; - } else { - min_level = first_level; - min = first; - max_level = second_level; - max = second; - } - - while (max_level > min_level) { - max_level--; - max = max->GetParent(); - } - - return min->GetParent() == max->GetParent(); -} - -void ASTManager::ShowCurrentState(std::string_view state) const { - LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); - SanityCheck(); -} - -void ASTManager::SanityCheck() const { - for (const auto& label : labels) { - if (!label->GetParent()) { - LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); - } - } -} - -void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode loop_start = label->GetNext(); - if (loop_start == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode parent = label->GetParent(); - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSegment(loop_start, goto_node); - const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); - ASTZipper* sub_zipper = do_while_node->GetSubNodes(); - sub_zipper->Init(loop_start, do_while_node); - zipper.InsertAfter(do_while_node, label); - sub_zipper->Remove(goto_node); -} - -void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode if_end = label->GetPrevious(); - if (if_end == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode prev = goto_node->GetPrevious(); - const Expr condition = goto_node->GetGotoCondition(); - bool do_else = false; - if (!disable_else_derivation && prev->IsIfThen()) { - const Expr if_condition = prev->GetIfCondition(); - do_else = ExprAreEqual(if_condition, condition); - } - const ASTNode parent = label->GetParent(); - zipper.DetachSegment(goto_node, if_end); - ASTNode if_node; - if (do_else) { - if_node = ASTBase::Make<ASTIfElse>(parent); - } else { - Expr neg_condition = MakeExprNot(condition); - if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); - } - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(goto_node, if_node); - zipper.InsertAfter(if_node, prev); - sub_zipper->Remove(goto_node); -} - -void ASTManager::MoveOutward(ASTNode goto_node) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode parent = goto_node->GetParent(); - ASTZipper& zipper2 = parent->GetManager(); - const ASTNode grandpa = parent->GetParent(); - const bool is_loop = parent->IsLoop(); - const bool is_else = parent->IsIfElse(); - const bool is_if = parent->IsIfThen(); - - const ASTNode prev = goto_node->GetPrevious(); - const ASTNode post = goto_node->GetNext(); - - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSingle(goto_node); - if (is_loop) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr<ExprVar>(var_index); - const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); - zipper2.InsertBefore(var_node_init, parent); - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); - zipper.InsertAfter(break_node, var_node); - } else if (is_if || is_else) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr<ExprVar>(var_index); - const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); - if (is_if) { - zipper2.InsertBefore(var_node_init, parent); - } else { - zipper2.InsertBefore(var_node_init, parent->GetPrevious()); - } - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - if (post) { - zipper.DetachTail(post); - const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(post, if_node); - zipper.InsertAfter(if_node, var_node); - } - } else { - UNREACHABLE(); - } - const ASTNode next = parent->GetNext(); - if (is_if && next && next->IsIfElse()) { - zipper2.InsertAfter(goto_node, next); - goto_node->SetParent(grandpa); - return; - } - zipper2.InsertAfter(goto_node, parent); - goto_node->SetParent(grandpa); -} - -class ASTClearer { -public: - ASTClearer() = default; - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) { - ast.nodes.clear(); - } - - void operator()([[maybe_unused]] const ASTVarSet& ast) {} - - void operator()([[maybe_unused]] const ASTLabel& ast) {} - - void operator()([[maybe_unused]] const ASTGoto& ast) {} - - void operator()(const ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTReturn& ast) {} - - void operator()([[maybe_unused]] const ASTBreak& ast) {} - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - node->Clear(); - } -}; - -void ASTManager::Clear() { - if (!main_node) { - return; - } - ASTClearer clearer{}; - clearer.Visit(main_node); - main_node.reset(); - program = nullptr; - labels_map.clear(); - labels.clear(); - gotos.clear(); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <functional> -#include <list> -#include <memory> -#include <optional> -#include <string> -#include <unordered_map> -#include <vector> - -#include "video_core/shader/expr.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -class ASTBase; -class ASTBlockDecoded; -class ASTBlockEncoded; -class ASTBreak; -class ASTDoWhile; -class ASTGoto; -class ASTIfElse; -class ASTIfThen; -class ASTLabel; -class ASTProgram; -class ASTReturn; -class ASTVarSet; - -using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, - ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; - -using ASTNode = std::shared_ptr<ASTBase>; - -enum class ASTZipperType : u32 { - Program, - IfThen, - IfElse, - Loop, -}; - -class ASTZipper final { -public: - explicit ASTZipper(); - - void Init(ASTNode first, ASTNode parent); - - ASTNode GetFirst() const { - return first; - } - - ASTNode GetLast() const { - return last; - } - - void PushBack(ASTNode new_node); - void PushFront(ASTNode new_node); - void InsertAfter(ASTNode new_node, ASTNode at_node); - void InsertBefore(ASTNode new_node, ASTNode at_node); - void DetachTail(ASTNode node); - void DetachSingle(ASTNode node); - void DetachSegment(ASTNode start, ASTNode end); - void Remove(ASTNode node); - - ASTNode first; - ASTNode last; -}; - -class ASTProgram { -public: - ASTZipper nodes{}; -}; - -class ASTIfThen { -public: - explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTIfElse { -public: - ASTZipper nodes{}; -}; - -class ASTBlockEncoded { -public: - explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} - u32 start; - u32 end; -}; - -class ASTBlockDecoded { -public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} - NodeBlock nodes; -}; - -class ASTVarSet { -public: - explicit ASTVarSet(u32 index_, Expr condition_) - : index{index_}, condition{std::move(condition_)} {} - - u32 index; - Expr condition; -}; - -class ASTLabel { -public: - explicit ASTLabel(u32 index_) : index{index_} {} - u32 index; - bool unused{}; -}; - -class ASTGoto { -public: - explicit ASTGoto(Expr condition_, u32 label_) - : condition{std::move(condition_)}, label{label_} {} - - Expr condition; - u32 label; -}; - -class ASTDoWhile { -public: - explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTReturn { -public: - explicit ASTReturn(Expr condition_, bool kills_) - : condition{std::move(condition_)}, kills{kills_} {} - - Expr condition; - bool kills; -}; - -class ASTBreak { -public: - explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; -}; - -class ASTBase { -public: - explicit ASTBase(ASTNode parent_, ASTData data_) - : data{std::move(data_)}, parent{std::move(parent_)} {} - - template <class U, class... Args> - static ASTNode Make(ASTNode parent, Args&&... args) { - return std::make_shared<ASTBase>(std::move(parent), - ASTData(U(std::forward<Args>(args)...))); - } - - void SetParent(ASTNode new_parent) { - parent = std::move(new_parent); - } - - ASTNode& GetParent() { - return parent; - } - - const ASTNode& GetParent() const { - return parent; - } - - u32 GetLevel() const { - u32 level = 0; - auto next_parent = parent; - while (next_parent) { - next_parent = next_parent->GetParent(); - level++; - } - return level; - } - - ASTData* GetInnerData() { - return &data; - } - - const ASTData* GetInnerData() const { - return &data; - } - - ASTNode GetNext() const { - return next; - } - - ASTNode GetPrevious() const { - return previous; - } - - ASTZipper& GetManager() { - return *manager; - } - - const ASTZipper& GetManager() const { - return *manager; - } - - std::optional<u32> GetGotoLabel() const { - if (const auto* inner = std::get_if<ASTGoto>(&data)) { - return {inner->label}; - } - return std::nullopt; - } - - Expr GetGotoCondition() const { - if (const auto* inner = std::get_if<ASTGoto>(&data)) { - return inner->condition; - } - return nullptr; - } - - void MarkLabelUnused() { - if (auto* inner = std::get_if<ASTLabel>(&data)) { - inner->unused = true; - } - } - - bool IsLabelUnused() const { - if (const auto* inner = std::get_if<ASTLabel>(&data)) { - return inner->unused; - } - return true; - } - - std::optional<u32> GetLabelIndex() const { - if (const auto* inner = std::get_if<ASTLabel>(&data)) { - return {inner->index}; - } - return std::nullopt; - } - - Expr GetIfCondition() const { - if (const auto* inner = std::get_if<ASTIfThen>(&data)) { - return inner->condition; - } - return nullptr; - } - - void SetGotoCondition(Expr new_condition) { - if (auto* inner = std::get_if<ASTGoto>(&data)) { - inner->condition = std::move(new_condition); - } - } - - bool IsIfThen() const { - return std::holds_alternative<ASTIfThen>(data); - } - - bool IsIfElse() const { - return std::holds_alternative<ASTIfElse>(data); - } - - bool IsBlockEncoded() const { - return std::holds_alternative<ASTBlockEncoded>(data); - } - - void TransformBlockEncoded(NodeBlock&& nodes) { - data = ASTBlockDecoded(std::move(nodes)); - } - - bool IsLoop() const { - return std::holds_alternative<ASTDoWhile>(data); - } - - ASTZipper* GetSubNodes() { - if (std::holds_alternative<ASTProgram>(data)) { - return &std::get_if<ASTProgram>(&data)->nodes; - } - if (std::holds_alternative<ASTIfThen>(data)) { - return &std::get_if<ASTIfThen>(&data)->nodes; - } - if (std::holds_alternative<ASTIfElse>(data)) { - return &std::get_if<ASTIfElse>(&data)->nodes; - } - if (std::holds_alternative<ASTDoWhile>(data)) { - return &std::get_if<ASTDoWhile>(&data)->nodes; - } - return nullptr; - } - - void Clear() { - next.reset(); - previous.reset(); - parent.reset(); - manager = nullptr; - } - -private: - friend class ASTZipper; - - ASTData data; - ASTNode parent; - ASTNode next; - ASTNode previous; - ASTZipper* manager{}; -}; - -class ASTManager final { -public: - explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); - ~ASTManager(); - - ASTManager(const ASTManager& o) = delete; - ASTManager& operator=(const ASTManager& other) = delete; - - ASTManager(ASTManager&& other) noexcept = default; - ASTManager& operator=(ASTManager&& other) noexcept = default; - - void Init(); - - void DeclareLabel(u32 address); - - void InsertLabel(u32 address); - - void InsertGoto(Expr condition, u32 address); - - void InsertBlock(u32 start_address, u32 end_address); - - void InsertReturn(Expr condition, bool kills); - - std::string Print() const; - - void Decompile(); - - void ShowCurrentState(std::string_view state) const; - - void SanityCheck() const; - - void Clear(); - - bool IsFullyDecompiled() const { - if (full_decompile) { - return gotos.empty(); - } - - for (ASTNode goto_node : gotos) { - auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return false; - } - ASTNode glabel = labels[*label_index]; - if (IsBackwardsJump(goto_node, glabel)) { - return false; - } - } - return true; - } - - ASTNode GetProgram() const { - return main_node; - } - - u32 GetVariables() const { - return variables; - } - - const std::vector<ASTNode>& GetLabels() const { - return labels; - } - -private: - bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; - - bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - void EncloseDoWhile(ASTNode goto_node, ASTNode label); - - void EncloseIfThen(ASTNode goto_node, ASTNode label); - - void MoveOutward(ASTNode goto_node); - - u32 NewVariable() { - return variables++; - } - - bool full_decompile{}; - bool disable_else_derivation{}; - std::unordered_map<u32, u32> labels_map{}; - u32 labels_count{}; - std::vector<ASTNode> labels{}; - std::list<ASTNode> gotos{}; - u32 variables{}; - ASTProgram* program{}; - ASTNode main_node{}; - Expr false_condition{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> -#include <thread> -#include <vector> -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/shader/async_shaders.h" - -namespace VideoCommon::Shader { - -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} - -AsyncShaders::~AsyncShaders() { - KillWorkers(); -} - -void AsyncShaders::AllocateWorkers() { - // Use at least one thread - u32 num_workers = 1; - - // Deduce how many more threads we can use - const u32 thread_count = std::thread::hardware_concurrency(); - if (thread_count >= 8) { - // Increase async workers by 1 for every 2 threads >= 8 - num_workers += 1 + (thread_count - 8) / 2; - } - - // If we already have workers queued, ignore - if (num_workers == worker_threads.size()) { - return; - } - - // If workers already exist, clear them - if (!worker_threads.empty()) { - FreeWorkers(); - } - - // Create workers - for (std::size_t i = 0; i < num_workers; i++) { - context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, - context_list[i].get()); - } -} - -void AsyncShaders::FreeWorkers() { - // Mark all threads to quit - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.join(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -void AsyncShaders::KillWorkers() { - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.detach(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -bool AsyncShaders::HasWorkQueued() const { - return !pending_queue.empty(); -} - -bool AsyncShaders::HasCompletedWork() const { - std::shared_lock lock{completed_mutex}; - return !finished_work.empty(); -} - -bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { - const auto& regs = gpu.Maxwell3D().regs; - - // If something is using depth, we can assume that games are not rendering anything which will - // be used one time. - if (regs.zeta_enable) { - return true; - } - - // If games are using a small index count, we can assume these are full screen quads. Usually - // these shaders are only used once for building textures so we can assume they can't be built - // async - if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { - return false; - } - - return true; -} - -std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { - std::vector<Result> results; - { - std::unique_lock lock{completed_mutex}; - results = std::move(finished_work); - finished_work.clear(); - } - return results; -} - -void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, - Tegra::Engines::ShaderType shader_type, u64 uid, - std::vector<u64> code, std::vector<u64> code_b, - u32 main_offset, CompilerSettings compiler_settings, - const Registry& registry, VAddr cpu_addr) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, - .device = &device, - .shader_type = shader_type, - .uid = uid, - .code = std::move(code), - .code_b = std::move(code_b), - .main_offset = main_offset, - .compiler_settings = compiler_settings, - .registry = registry, - .cpu_address = cpu_addr, - .pp_cache = nullptr, - .vk_device = nullptr, - .scheduler = nullptr, - .descriptor_pool = nullptr, - .update_descriptor_queue = nullptr, - .bindings{}, - .program{}, - .key{}, - .num_color_buffers = 0, - }); - cv.notify_one(); -} - -void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector<VkDescriptorSetLayoutBinding> bindings, - Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = Backend::Vulkan, - .device = nullptr, - .shader_type{}, - .uid = 0, - .code{}, - .code_b{}, - .main_offset = 0, - .compiler_settings{}, - .registry{}, - .cpu_address = 0, - .pp_cache = pp_cache, - .vk_device = &device, - .scheduler = &scheduler, - .descriptor_pool = &descriptor_pool, - .update_descriptor_queue = &update_descriptor_queue, - .bindings = std::move(bindings), - .program = std::move(program), - .key = key, - .num_color_buffers = num_color_buffers, - }); - cv.notify_one(); -} - -void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - while (!is_thread_exiting.load(std::memory_order_relaxed)) { - std::unique_lock lock{queue_mutex}; - cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); - if (is_thread_exiting) { - return; - } - - // Partial lock to allow all threads to read at the same time - if (!HasWorkQueued()) { - continue; - } - // Another thread beat us, just unlock and wait for the next load - if (pending_queue.empty()) { - continue; - } - - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop(); - lock.unlock(); - - if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); - const auto scope = context->Acquire(); - auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); - Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; - - if (work.backend == Backend::OpenGL) { - result.program.opengl = std::move(program->source_program); - } else if (work.backend == Backend::GLASM) { - result.program.glasm = std::move(program->assembly_program); - } - - { - std::unique_lock complete_lock(completed_mutex); - finished_work.push_back(std::move(result)); - } - } else if (work.backend == Backend::Vulkan) { - auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( - *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, work.key, work.bindings, work.program, - work.num_color_buffers); - - work.pp_cache->EmplacePipeline(std::move(pipeline)); - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <condition_variable> -#include <memory> -#include <shared_mutex> -#include <thread> - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Core::Frontend { -class EmuWindow; -class GraphicsContext; -} // namespace Core::Frontend - -namespace Tegra { -class GPU; -} - -namespace Vulkan { -class VKPipelineCache; -} - -namespace VideoCommon::Shader { - -class AsyncShaders { -public: - enum class Backend { - OpenGL, - GLASM, - Vulkan, - }; - - struct ResultPrograms { - OpenGL::OGLProgram opengl; - OpenGL::OGLAssemblyProgram glasm; - }; - - struct Result { - u64 uid; - VAddr cpu_address; - Backend backend; - ResultPrograms program; - std::vector<u64> code; - std::vector<u64> code_b; - Tegra::Engines::ShaderType shader_type; - }; - - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); - ~AsyncShaders(); - - /// Start up shader worker threads - void AllocateWorkers(); - - /// Clear the shader queue and kill all worker threads - void FreeWorkers(); - - // Force end all threads - void KillWorkers(); - - /// Check to see if any shaders have actually been compiled - [[nodiscard]] bool HasCompletedWork() const; - - /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build - /// every shader async as some shaders are only built and executed once. We try to "guess" which - /// shader would be used only once - [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; - - /// Pulls completed compiled shaders - [[nodiscard]] std::vector<Result> GetCompletedWork(); - - void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, - u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, - CompilerSettings compiler_settings, const Registry& registry, - VAddr cpu_addr); - - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, - Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector<VkDescriptorSetLayoutBinding> bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, - u32 num_color_buffers); - -private: - void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); - - /// Check our worker queue to see if we have any work queued already - [[nodiscard]] bool HasWorkQueued() const; - - struct WorkerParams { - Backend backend; - // For OGL - const OpenGL::Device* device; - Tegra::Engines::ShaderType shader_type; - u64 uid; - std::vector<u64> code; - std::vector<u64> code_b; - u32 main_offset; - CompilerSettings compiler_settings; - std::optional<Registry> registry; - VAddr cpu_address; - - // For Vulkan - Vulkan::VKPipelineCache* pp_cache; - const Vulkan::Device* vk_device; - Vulkan::VKScheduler* scheduler; - Vulkan::VKDescriptorPool* descriptor_pool; - Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - std::vector<VkDescriptorSetLayoutBinding> bindings; - Vulkan::SPIRVProgram program; - Vulkan::GraphicsPipelineCacheKey key; - u32 num_color_buffers; - }; - - std::condition_variable cv; - mutable std::mutex queue_mutex; - mutable std::shared_mutex completed_mutex; - std::atomic<bool> is_thread_exiting{}; - std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; - std::vector<std::thread> worker_threads; - std::queue<WorkerParams> pending_queue; - std::vector<Result> finished_work; - Core::Frontend::EmuWindow& emu_window; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/shader/compiler_settings.h" - -namespace VideoCommon::Shader { - -std::string CompileDepthAsString(const CompileDepth cd) { - switch (cd) { - case CompileDepth::BruteForce: - return "Brute Force Compile"; - case CompileDepth::FlowStack: - return "Simple Flow Stack Mode"; - case CompileDepth::NoFlowStack: - return "Remove Flow Stack"; - case CompileDepth::DecompileBackwards: - return "Decompile Backward Jumps"; - case CompileDepth::FullDecompile: - return "Full Decompilation"; - default: - return "Unknown Compiler Process"; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class CompileDepth : u32 { - BruteForce = 0, - FlowStack = 1, - NoFlowStack = 2, - DecompileBackwards = 3, - FullDecompile = 4, -}; - -std::string CompileDepthAsString(CompileDepth cd); - -struct CompilerSettings { - CompileDepth depth{CompileDepth::NoFlowStack}; - bool disable_else_derivation{true}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <list> -#include <map> -#include <set> -#include <stack> -#include <unordered_map> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -constexpr s32 unassigned_branch = -2; - -struct Query { - u32 address{}; - std::stack<u32> ssy_stack{}; - std::stack<u32> pbk_stack{}; -}; - -struct BlockStack { - BlockStack() = default; - explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - std::stack<u32> ssy_stack{}; - std::stack<u32> pbk_stack{}; -}; - -template <typename T, typename... Args> -BlockBranchInfo MakeBranchInfo(Args&&... args) { - static_assert(std::is_convertible_v<T, BranchData>); - return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); -} - -bool BlockBranchIsIgnored(BlockBranchInfo first) { - bool ignore = false; - if (std::holds_alternative<SingleBranch>(*first)) { - const auto branch = std::get_if<SingleBranch>(first.get()); - ignore = branch->ignore; - } - return ignore; -} - -struct BlockInfo { - u32 start{}; - u32 end{}; - bool visited{}; - BlockBranchInfo branch{}; - - bool IsInside(const u32 address) const { - return start <= address && address <= end; - } -}; - -struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) - : program_code{program_code_}, registry{registry_}, start{start_} {} - - const ProgramCode& program_code; - Registry& registry; - u32 start{}; - std::vector<BlockInfo> block_info; - std::list<u32> inspect_queries; - std::list<Query> queries; - std::unordered_map<u32, u32> registered; - std::set<u32> labels; - std::map<u32, u32> ssy_labels; - std::map<u32, u32> pbk_labels; - std::unordered_map<u32, BlockStack> stacks; - ASTManager* manager{}; -}; - -enum class BlockCollision : u32 { None, Found, Inside }; - -std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { - const auto& blocks = state.block_info; - for (u32 index = 0; index < blocks.size(); index++) { - if (blocks[index].start == address) { - return {BlockCollision::Found, index}; - } - if (blocks[index].IsInside(address)) { - return {BlockCollision::Inside, index}; - } - } - return {BlockCollision::None, 0xFFFFFFFF}; -} - -struct ParseInfo { - BlockBranchInfo branch_info{}; - u32 end_address{}; -}; - -BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { - auto& it = state.block_info.emplace_back(); - it.start = start; - it.end = end; - const u32 index = static_cast<u32>(state.block_info.size() - 1); - state.registered.insert({start, index}); - return it; -} - -Pred GetPredicate(u32 index, bool negated) { - return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); -} - -enum class ParseResult : u32 { - ControlCaught, - BlockEnd, - AbnormalFlow, -}; - -struct BranchIndirectInfo { - u32 buffer{}; - u32 offset{}; - u32 entries{}; - s32 relative_position{}; -}; - -struct BufferInfo { - u32 index; - u32 offset; -}; - -std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() != OpCode::Id::BRX) { - return std::nullopt; - } - if (instr.brx.constant_buffer != 0) { - return std::nullopt; - } - --pos; - return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); -} - -template <typename Result, typename TestCallable, typename PackCallable> -// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> -// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> -std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, - PackCallable pack) { - for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.start)) { - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (!opcode) { - continue; - } - if (test(instr, opcode->get())) { - --pos; - return std::make_optional(pack(instr, opcode->get())); - } - } - return std::nullopt; -} - -std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, - u64 brx_tracked_register) { - return TrackInstruction<std::pair<BufferInfo, u64>>( - state, pos, - [brx_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::LD_C && - instr.gpr0.Value() == brx_tracked_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; - }, - [](auto instr, const auto& opcode) { - const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), - static_cast<u32>(instr.cbuf36.GetOffset())}; - return std::make_pair(info, instr.gpr8.Value()); - }); -} - -std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, - u64 ldc_tracked_register) { - return TrackInstruction<u64>( - state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); -} - -std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, - u64 shl_tracked_register) { - return TrackInstruction<u32>( - state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); - }); -} - -std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { - const auto brx_info = GetBRXInfo(state, pos); - if (!brx_info) { - return std::nullopt; - } - const auto [relative_position, brx_tracked_register] = *brx_info; - - const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); - if (!ldc_info) { - return std::nullopt; - } - const auto [buffer_info, ldc_tracked_register] = *ldc_info; - - const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); - if (!shl_tracked_register) { - return std::nullopt; - } - - const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); - if (!entries) { - return std::nullopt; - } - - return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; -} - -std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { - u32 offset = static_cast<u32>(address); - const u32 end_address = static_cast<u32>(state.program_code.size()); - ParseInfo parse_info{}; - SingleBranch single_branch{}; - - const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { - const auto pair = rebuild_state.labels.emplace(label_address); - if (pair.second) { - rebuild_state.inspect_queries.push_back(label_address); - } - }; - - while (true) { - if (offset >= end_address) { - // ASSERT_OR_EXECUTE can't be used, as it ignores the break - ASSERT_MSG(false, "Shader passed the current limit!"); - - single_branch.address = exit_branch; - single_branch.ignore = false; - break; - } - if (state.registered.contains(offset)) { - single_branch.address = offset; - single_branch.ignore = true; - break; - } - if (IsSchedInstruction(offset, state.start)) { - offset++; - continue; - } - const Instruction instr = {state.program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { - offset++; - continue; - } - - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRA: { - if (instr.bra.constant_buffer != 0) { - return {ParseResult::AbnormalFlow, parse_info}; - } - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - const u32 branch_offset = offset + instr.bra.GetBranchTarget(); - if (branch_offset == 0) { - single_branch.address = exit_branch; - } else { - single_branch.address = branch_offset; - } - insert_label(state, branch_offset); - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SYNC: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = true; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRK: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = true; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::KIL: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = true; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SSY: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.ssy_labels.emplace(offset, target); - break; - } - case OpCode::Id::PBK: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.pbk_labels.emplace(offset, target); - break; - } - case OpCode::Id::BRX: { - const auto tmp = TrackBranchIndirectInfo(state, offset); - if (!tmp) { - LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); - return {ParseResult::AbnormalFlow, parse_info}; - } - - const auto result = *tmp; - const s32 pc_target = offset + result.relative_position; - std::vector<CaseBranch> branches; - for (u32 i = 0; i < result.entries; i++) { - auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); - if (!key) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *key; - u32 target = static_cast<u32>((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<MultiBranch>( - static_cast<u32>(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } - default: - break; - } - - offset++; - } - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - parse_info.end_address = offset - 1; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, - single_branch.is_brk, single_branch.ignore); - return {ParseResult::BlockEnd, parse_info}; -} - -bool TryInspectAddress(CFGRebuildState& state) { - if (state.inspect_queries.empty()) { - return false; - } - - const u32 address = state.inspect_queries.front(); - state.inspect_queries.pop_front(); - const auto [result, block_index] = TryGetBlock(state, address); - switch (result) { - case BlockCollision::Found: { - return true; - } - case BlockCollision::Inside: { - // This case is the tricky one: - // We need to split the block into 2 separate blocks - const u32 end = state.block_info[block_index].end; - BlockInfo& new_block = CreateBlockInfo(state, address, end); - BlockInfo& current_block = state.block_info[block_index]; - current_block.end = address - 1; - new_block.branch = std::move(current_block.branch); - BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); - const auto branch = std::get_if<SingleBranch>(forward_branch.get()); - branch->address = address; - branch->ignore = true; - current_block.branch = std::move(forward_branch); - return true; - } - default: - break; - } - const auto [parse_result, parse_info] = ParseCode(state, address); - if (parse_result == ParseResult::AbnormalFlow) { - // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction - return false; - } - - BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info.branch = parse_info.branch_info; - if (std::holds_alternative<SingleBranch>(*block_info.branch)) { - const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); - if (branch->condition.IsUnconditional()) { - return true; - } - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); - return true; - } - return true; -} - -bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, - BlockInfo& block) { - auto gather_start = labels.lower_bound(block.start); - const auto gather_end = labels.upper_bound(block.end); - while (gather_start != gather_end) { - cc.push(gather_start->second); - ++gather_start; - } - }; - if (state.queries.empty()) { - return false; - } - - Query& q = state.queries.front(); - const u32 block_index = state.registered[q.address]; - BlockInfo& block = state.block_info[block_index]; - // If the block is visited, check if the stacks match, else gather the ssy/pbk - // labels into the current stack and look if the branch at the end of the block - // consumes a label. Schedule new queries accordingly - if (block.visited) { - BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); - state.queries.pop_front(); - return all_okay; - } - block.visited = true; - state.stacks.insert_or_assign(q.address, BlockStack{q}); - - Query q2(q); - state.queries.pop_front(); - gather_labels(q2.ssy_stack, state.ssy_labels, block); - gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (std::holds_alternative<SingleBranch>(*block.branch)) { - auto* branch = std::get_if<SingleBranch>(block.branch.get()); - if (!branch->condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } - - auto& conditional_query = state.queries.emplace_back(q2); - if (branch->is_sync) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.ssy_stack.top(); - } - conditional_query.ssy_stack.pop(); - } - if (branch->is_brk) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.pbk_stack.top(); - } - conditional_query.pbk_stack.pop(); - } - conditional_query.address = branch->address; - return true; - } - - const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); - for (const auto& branch_case : multi_branch->branches) { - auto& conditional_query = state.queries.emplace_back(q2); - conditional_query.address = branch_case.address; - } - - return true; -} - -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = [](const Condition& cond) -> Expr { - Expr result; - if (cond.cc != ConditionCode::T) { - result = MakeExpr<ExprCondCode>(cond.cc); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast<u32>(cond.predicate); - bool negate = false; - if (pred > 7) { - negate = true; - pred -= 8; - } - Expr extra = MakeExpr<ExprPredicate>(pred); - if (negate) { - extra = MakeExpr<ExprNot>(std::move(extra)); - } - if (result) { - return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); - } - return extra; - } - if (result) { - return result; - } - return MakeExpr<ExprBoolean>(true); - }; - - if (std::holds_alternative<SingleBranch>(*branch_info)) { - const auto* branch = std::get_if<SingleBranch>(branch_info.get()); - if (branch->address < 0) { - if (branch->kill) { - mm.InsertReturn(get_expr(branch->condition), true); - return; - } - mm.InsertReturn(get_expr(branch->condition), false); - return; - } - mm.InsertGoto(get_expr(branch->condition), branch->address); - return; - } - const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); - for (const auto& branch_case : multi_branch->branches) { - mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), - branch_case.address); - } -} - -void DecompileShader(CFGRebuildState& state) { - state.manager->Init(); - for (auto label : state.labels) { - state.manager->DeclareLabel(label); - } - for (const auto& block : state.block_info) { - if (state.labels.contains(block.start)) { - state.manager->InsertLabel(block.start); - } - const bool ignore = BlockBranchIsIgnored(block.branch); - const u32 end = ignore ? block.end + 1 : block.end; - state.manager->InsertBlock(block.start, end); - if (!ignore) { - InsertBranch(*state.manager, block.branch); - } - } - state.manager->Decompile(); -} - -} // Anonymous namespace - -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry) { - auto result_out = std::make_unique<ShaderCharacteristics>(); - if (settings.depth == CompileDepth::BruteForce) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - - CFGRebuildState state{program_code, start_address, registry}; - // Inspect Code and generate blocks - state.labels.clear(); - state.labels.emplace(start_address); - state.inspect_queries.push_back(state.start); - while (!state.inspect_queries.empty()) { - if (!TryInspectAddress(state)) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - } - - bool use_flow_stack = true; - - bool decompiled = false; - - if (settings.depth != CompileDepth::FlowStack) { - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; - } - } - } - - use_flow_stack = !decompiled; - - // Sort and organize results - std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - if (decompiled && settings.depth != CompileDepth::NoFlowStack) { - ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, - settings.disable_else_derivation}; - state.manager = &manager; - DecompileShader(state); - decompiled = state.manager->IsFullyDecompiled(); - if (!decompiled) { - if (settings.depth == CompileDepth::FullDecompile) { - LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); - } else { - LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); - } - state.manager->ShowCurrentState("Of Shader"); - state.manager->Clear(); - } else { - auto characteristics = std::make_unique<ShaderCharacteristics>(); - characteristics->start = start_address; - characteristics->settings.depth = settings.depth; - characteristics->manager = std::move(manager); - characteristics->end = state.block_info.back().end + 1; - return characteristics; - } - } - - result_out->start = start_address; - result_out->settings.depth = - use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out->blocks.clear(); - for (auto& block : state.block_info) { - ShaderBlock new_block{}; - new_block.start = block.start; - new_block.end = block.end; - new_block.ignore_branch = BlockBranchIsIgnored(block.branch); - if (!new_block.ignore_branch) { - new_block.branch = block.branch; - } - result_out->end = std::max(result_out->end, block.end); - result_out->blocks.push_back(new_block); - } - if (!use_flow_stack) { - result_out->labels = std::move(state.labels); - return result_out; - } - - auto back = result_out->blocks.begin(); - auto next = std::next(back); - while (next != result_out->blocks.end()) { - if (!state.labels.contains(next->start) && next->start == back->end + 1) { - back->end = next->end; - next = result_out->blocks.erase(next); - continue; - } - back = next; - ++next; - } - - return result_out; -} -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <list> -#include <optional> -#include <set> -#include <variant> - -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -constexpr s32 exit_branch = -1; - -struct Condition { - Pred predicate{Pred::UnusedIndex}; - ConditionCode cc{ConditionCode::T}; - - bool IsUnconditional() const { - return predicate == Pred::UnusedIndex && cc == ConditionCode::T; - } - - bool operator==(const Condition& other) const { - return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); - } - - bool operator!=(const Condition& other) const { - return !operator==(other); - } -}; - -class SingleBranch { -public: - SingleBranch() = default; - explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, - bool is_brk_, bool ignore_) - : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, - ignore{ignore_} {} - - bool operator==(const SingleBranch& b) const { - return std::tie(condition, address, kill, is_sync, is_brk, ignore) == - std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); - } - - bool operator!=(const SingleBranch& b) const { - return !operator==(b); - } - - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; - -struct CaseBranch { - explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} - u32 cmp_value; - u32 address; -}; - -class MultiBranch { -public: - explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) - : gpr{gpr_}, branches{std::move(branches_)} {} - - u32 gpr{}; - std::vector<CaseBranch> branches{}; -}; - -using BranchData = std::variant<SingleBranch, MultiBranch>; -using BlockBranchInfo = std::shared_ptr<BranchData>; - -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); - -struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; - BlockBranchInfo branch{}; - - bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch) == - std::tie(sb.start, sb.end, sb.ignore_branch) && - BlockBranchInfoAreEqual(branch, sb.branch); - } - - bool operator!=(const ShaderBlock& sb) const { - return !operator==(sb); - } -}; - -struct ShaderCharacteristics { - std::list<ShaderBlock> blocks{}; - std::set<u32> labels{}; - u32 start{}; - u32 end{}; - ASTManager manager{true, true}; - CompilerSettings settings{}; -}; - -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> -#include <limits> -#include <set> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { - -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list<SamplerEntry>& used_samplers) { - if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector<u32> bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - ++count; - bound_offsets.emplace_back(sampler.offset); - } - if (count > 1) { - gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - -std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, - VideoCore::GuestDriverProfile& gpu_driver, - const std::list<SamplerEntry>& used_samplers) { - const u32 base_offset = sampler_to_deduce.offset; - u32 max_offset{std::numeric_limits<u32>::max()}; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - if (sampler.offset > base_offset) { - max_offset = std::min(sampler.offset, max_offset); - } - } - if (max_offset == std::numeric_limits<u32>::max()) { - return std::nullopt; - } - return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); -} - -} // Anonymous namespace - -class ASTDecoder { -public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) {} - - void operator()(ASTVarSet& ast) {} - - void operator()(ASTLabel& ast) {} - - void operator()(ASTGoto& ast) {} - - void operator()(ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTReturn& ast) {} - - void operator()(ASTBreak& ast) {} - - void Visit(ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - if (node->IsBlockEncoded()) { - auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); - NodeBlock bb = ir.DecodeRange(block->start, block->end); - node->TransformBlockEncoded(std::move(bb)); - } - } - -private: - ShaderIR& ir; -}; - -void ShaderIR::Decode() { - std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - - decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast<u32>(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast<u32>(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast<u32>(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } -} - -NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - NodeBlock basic_block; - DecodeRangeInner(basic_block, begin, end); - return basic_block; -} - -void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(bb, pc); - } -} - -void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { - Node result = n; - if (cond.cc != ConditionCode::T) { - result = Conditional(GetConditionCode(cond.cc), {result}); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast<u32>(cond.predicate); - const bool is_neg = pred > 7; - if (is_neg) { - pred -= 8; - } - result = Conditional(GetPredicate(pred, is_neg), {result}); - } - return result; - }; - if (std::holds_alternative<SingleBranch>(*block.branch)) { - auto branch = std::get_if<SingleBranch>(block.branch.get()); - if (branch->address < 0) { - if (branch->kill) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Branch, Immediate(branch->address)); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); - Node op_a = GetRegister(multi_branch->gpr); - for (auto& branch_case : multi_branch->branches) { - Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); - Node op_b = Immediate(branch_case.cmp_value); - Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); - auto result = Conditional(condition, {n}); - bb.push_back(result); - global_code.push_back(result); - } -} - -u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { - // Ignore sched instructions when generating code. - if (IsSchedInstruction(pc, main_offset)) { - return pc + 1; - } - - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const u32 nv_address = ConvertAddressToNvidiaSpace(pc); - - // Decoding failure - if (!opcode) { - UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); - bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", - nv_address, instr.value))); - return pc + 1; - } - - bb.push_back(Comment( - fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); - - using Tegra::Shader::Pred; - UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, - "NeverExecute predicate not implemented"); - - static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, - {OpCode::Type::Image, &ShaderIR::DecodeImage}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; - - std::vector<Node> tmp_block; - if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, pc); - } else { - pc = DecodeOther(tmp_block, pc); - } - - // Some instructions (like SSY) don't have a predicate field, they are always unconditionally - // executed. - const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - - if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { - const Node conditional = - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); - global_code.push_back(conditional); - bb.push_back(conditional); - } else { - for (auto& node : tmp_block) { - global_code.push_back(node); - bb.push_back(node); - } - } - - return pc + 1; -} - -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto gpu_driver = registry.AccessGuestDriverProfile(); - DeduceTextureHandlerSize(gpu_driver, used_samplers); - // Deduce Indexed Samplers - if (!uses_indexed_samplers) { - return; - } - for (auto& sampler : used_samplers) { - if (!sampler.is_indexed) { - continue; - } - if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { - sampler.size = *size; - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.size = 1; - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::SubOp; - -u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - - Node op_b = [&] { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV_C: - case OpCode::Id::MOV_R: { - // MOV does not have neither 'abs' nor 'neg' bits. - SetRegister(bb, instr.gpr0, op_b); - break; - } - case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: - case OpCode::Id::FMUL_IMM: { - // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - if (instr.fmul.tab5cb8_2 != 0) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - } - if (instr.fmul.tab5c68_0 != 1) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); - } - - op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - - static constexpr std::array FmulPostFactor = { - 1.000f, // None - 0.500f, // Divide 2 - 0.250f, // Divide 4 - 0.125f, // Divide 8 - 8.000f, // Mul 8 - 4.000f, // Mul 4 - 2.000f, // Mul 2 - }; - - if (instr.fmul.postfactor != 0) { - op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, - Immediate(FmulPostFactor[instr.fmul.postfactor])); - } - - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); - - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: - case OpCode::Id::FADD_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::MUFU: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - - Node value = [&]() { - switch (instr.sub_op) { - case SubOp::Cos: - return Operation(OperationCode::FCos, PRECISE, op_a); - case SubOp::Sin: - return Operation(OperationCode::FSin, PRECISE, op_a); - case SubOp::Ex2: - return Operation(OperationCode::FExp2, PRECISE, op_a); - case SubOp::Lg2: - return Operation(OperationCode::FLog2, PRECISE, op_a); - case SubOp::Rcp: - return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); - case SubOp::Rsq: - return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); - case SubOp::Sqrt: - return Operation(OperationCode::FSqrt, PRECISE, op_a); - default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); - return Immediate(0); - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FMNMX_C: - case OpCode::Id::FMNMX_R: - case OpCode::Id::FMNMX_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); - - const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); - const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: - case OpCode::Id::FCMP_IMMR: { - UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); - Node op_c = GetRegister(instr.gpr39); - Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); - break; - } - case OpCode::Id::RRO_C: - case OpCode::Id::RRO_R: - case OpCode::Id::RRO_IMM: { - LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); - - // Currently RRO is only implemented as a register move. - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - SetRegister(bb, instr.gpr0, op_b); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - bool negate_a = false; - bool negate_b = false; - bool absolute_a = false; - bool absolute_b = false; - - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_R: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HADD2_C: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 56) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - case OpCode::Id::HMUL2_R: - negate_a = ((instr.value >> 43) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HMUL2_C: - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - default: - UNREACHABLE(); - break; - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - - auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HMUL2_C: - return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HADD2_R: - case OpCode::Id::HMUL2_R: - return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; - default: - UNREACHABLE(); - return {HalfType::F32, Immediate(0)}; - } - }(); - op_b = UnpackHalfFloat(op_b, type_b); - op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - - Node value = [this, opcode, op_a, op_b = op_b] { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_C: - case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); - return Immediate(0); - } - }(); - value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); - - const Node op_b = UnpackHalfImmediate(instr, true); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); - SetRegister(bb, instr.gpr0, value); - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV32_IMM: { - SetRegister(bb, instr.gpr0, GetImmediate32(instr)); - break; - } - case OpCode::Id::FMUL32_IMM: { - Node value = - Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); - value = GetSaturatedFloat(value, instr.fmul32.saturate); - - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD32I: { - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, - instr.fadd32i.negate_a); - const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, - instr.fadd32i.negate_b); - - const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::IAdd3Height; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD_C: - case OpCode::Id::IADD_R: - case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); - UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - Node value = Operation(OperationCode::UAdd, op_a, op_b); - - if (instr.iadd.x) { - Node carry = GetInternalFlag(InternalFlag::Carry); - Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); - } - - if (instr.generates_cc) { - const Node i0 = Immediate(0); - - Node zero = Operation(OperationCode::LogicalIEqual, value, i0); - Node sign = Operation(OperationCode::LogicalILessThan, value, i0); - Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); - - Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); - Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); - Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); - Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); - - SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); - SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); - SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); - SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::IADD3_C: - case OpCode::Id::IADD3_R: - case OpCode::Id::IADD3_IMM: { - Node op_c = GetRegister(instr.gpr39); - - const auto ApplyHeight = [&](IAdd3Height height, Node value) { - switch (height) { - case IAdd3Height::None: - return value; - case IAdd3Height::LowerHalfWord: - return BitfieldExtract(value, 0, 16); - case IAdd3Height::UpperHalfWord: - return BitfieldExtract(value, 16, 16); - default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); - return Immediate(0); - } - }; - - if (opcode->get().GetId() == OpCode::Id::IADD3_R) { - op_a = ApplyHeight(instr.iadd3.height_a, op_a); - op_b = ApplyHeight(instr.iadd3.height_b, op_b); - op_c = ApplyHeight(instr.iadd3.height_c, op_c); - } - - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); - op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - - const Node value = [&] { - Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); - if (opcode->get().GetId() != OpCode::Id::IADD3_R) { - return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); - } - const Node shifted = [&] { - switch (instr.iadd3.mode) { - case Tegra::Shader::IAdd3Mode::RightShift: - // TODO(tech4me): According to - // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 - // The addition between op_a and op_b should be done in uint33, more - // investigation required - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, - Immediate(16)); - case Tegra::Shader::IAdd3Mode::LeftShift: - return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, - Immediate(16)); - default: - return add_ab; - } - }(); - return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); - }(); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ISCADD_C: - case OpCode::Id::ISCADD_R: - case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); - const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::POPC_C: - case OpCode::Id::POPC_R: - case OpCode::Id::POPC_IMM: { - if (instr.popc.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - } - const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FLO_R: - case OpCode::Id::FLO_C: - case OpCode::Id::FLO_IMM: { - Node value; - if (instr.flo.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.is_signed) { - value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); - } else { - value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.sh) { - value = - Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::SEL_C: - case OpCode::Id::SEL_R: - case OpCode::Id::SEL_IMM: { - const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); - const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ICMP_CR: - case OpCode::Id::ICMP_R: - case OpCode::Id::ICMP_RC: - case OpCode::Id::ICMP_IMM: { - const Node zero = Immediate(0); - - const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_R: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::ICMP_IMM: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {zero, zero}; - } - }(); - const Node op_lhs = GetRegister(instr.gpr8); - const Node comparison = - GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); - break; - } - case OpCode::Id::LOP_C: - case OpCode::Id::LOP_R: - case OpCode::Id::LOP_IMM: { - if (instr.alu.lop.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); - if (instr.alu.lop.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, - instr.generates_cc); - break; - } - case OpCode::Id::LOP3_C: - case OpCode::Id::LOP3_R: - case OpCode::Id::LOP3_IMM: { - const Node op_c = GetRegister(instr.gpr39); - const Node lut = [&]() { - if (opcode->get().GetId() == OpCode::Id::LOP3_R) { - return Immediate(instr.alu.lop3.GetImmLut28()); - } else { - return Immediate(instr.alu.lop3.GetImmLut48()); - } - }(); - - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); - break; - } - case OpCode::Id::IMNMX_C: - case OpCode::Id::IMNMX_R: - case OpCode::Id::IMNMX_IMM: { - UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - - const bool is_signed = instr.imnmx.is_signed; - - const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); - const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::LEA_R2: - case OpCode::Id::LEA_R1: - case OpCode::Id::LEA_IMM: - case OpCode::Id::LEA_RZ: - case OpCode::Id::LEA_HI: { - auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::LEA_R2: { - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), - Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; - } - case OpCode::Id::LEA_R1: { - const bool neg = instr.lea.r1.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - GetRegister(instr.gpr20), - Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; - } - case OpCode::Id::LEA_IMM: { - const bool neg = instr.lea.imm.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast<u32>(instr.lea.imm.entry_a)), - Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; - } - case OpCode::Id::LEA_RZ: { - const bool neg = instr.lea.rz.neg != 0; - return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; - } - case OpCode::Id::LEA_HI: - default: - UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); - - return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), - Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), - "Unhandled LEA Predicate"); - - Node value = - Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); - value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); - SetRegister(bb, instr.gpr0, std::move(value)); - - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut, bool sets_cc) { - const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { - Node value = Immediate(0); - const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); - if (imm.GetValue() & 0x01) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x02) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x04) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x08) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x10) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x20) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x40) { - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x80) { - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - return value; - }(op_a, op_b, op_c, imm_lut); - - SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); - SetRegister(bb, dest, lop3_fast); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::LogicOperation; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredicateResultMode; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - - op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - - Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) { - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); - } - - if (instr.alu.lop32i.invert_b) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), - std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, - instr.op_32.generates_cc != 0); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, - Node op_b, PredicateResultMode predicate_mode, Pred predicate, - bool sets_cc) { - Node result = [&] { - switch (logic_op) { - case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::PassB: - return op_b; - default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); - return Immediate(0); - } - }(); - - SetInternalFlagsFromInteger(bb, result, sets_cc); - SetRegister(bb, dest, result); - - // Write the predicate value depending on the predicate mode. - switch (predicate_mode) { - case PredicateResultMode::None: - // Do nothing. - return; - case PredicateResultMode::NotZero: { - // Set the predicate to true if the result is not zero. - Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); - SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_R: - return GetRegister(instr.gpr20); - case OpCode::Id::BFE_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::BFE_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - - const bool is_signed = instr.bfe.is_signed; - - // using reverse parallel method in - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - // note for later if possible to implement faster method. - if (instr.bfe.brev) { - const auto swap = [&](u32 s, u32 mask) { - Node v1 = - SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); - if (mask != 0) { - v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), - Immediate(mask)); - } - Node v2 = op_a; - if (mask != 0) { - v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), - Immediate(mask)); - } - v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), - Immediate(s)); - return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), - std::move(v2)); - }; - op_a = swap(1, 0x55555555U); - op_a = swap(2, 0x33333333U); - op_a = swap(4, 0x0F0F0F0FU); - op_a = swap(8, 0x00FF00FFU); - op_a = swap(16, 0); - } - - const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(0), Immediate(8)); - const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(8), Immediate(8)); - auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); - SetRegister(bb, instr.gpr0, std::move(result)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::BFI_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::BFI_IMM_R: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {Immediate(0), Immediate(0)}; - } - }(); - const Node insert = GetRegister(instr.gpr8); - const Node offset = BitfieldExtract(packed_shift, 0, 8); - const Node bits = BitfieldExtract(packed_shift, 8, 8); - - const Node value = - Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <limits> -#include <optional> -#include <utility> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; - -namespace { - -constexpr OperationCode GetFloatSelector(u64 selector) { - return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; -} - -constexpr u32 SizeInBits(Register::Size size) { - switch (size) { - case Register::Size::Byte: - return 8; - case Register::Size::Short: - return 16; - case Register::Size::Word: - return 32; - case Register::Size::Long: - return 64; - } - return 0; -} - -constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, - Register::Size dst_size, - bool src_signed, - bool dst_signed) { - const u32 dst_bits = SizeInBits(dst_size); - if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { - if (src_signed == dst_signed) { - return std::nullopt; - } - return std::make_pair(0, std::numeric_limits<s32>::max()); - } - if (dst_signed) { - // Signed destination, clamp to [-128, 127] for instance - return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); - } else { - // Unsigned destination - if (dst_bits == 32) { - // Avoid shifting by 32, that is undefined behavior - return std::make_pair(0, s32(std::numeric_limits<u32>::max())); - } - return std::make_pair(0, (1 << dst_bits) - 1); - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - case OpCode::Id::I2I_C: - case OpCode::Id::I2I_IMM: { - const bool src_signed = instr.conversion.is_input_signed; - const bool dst_signed = instr.conversion.is_output_signed; - const Register::Size src_size = instr.conversion.src_size; - const Register::Size dst_size = instr.conversion.dst_size; - const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); - - Node value = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2I_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - // Ensure the source selector is valid - switch (instr.conversion.src_size) { - case Register::Size::Byte: - break; - case Register::Size::Short: - ASSERT(selector == 0 || selector == 2); - break; - default: - ASSERT(selector == 0); - break; - } - - if (src_size != Register::Size::Word || selector != 0) { - value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), - Immediate(selector * 8), Immediate(SizeInBits(src_size))); - } - - value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, - instr.conversion.negate_a, src_signed); - - if (instr.alu.saturate_d) { - if (src_signed && !dst_signed) { - Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, - Immediate(1 << (SizeInBits(src_size) - 1))); - value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), - std::move(value)); - - // Simplify generated expressions, this can be removed without semantic impact - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - - if (dst_size != Register::Size::Word) { - const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); - Node is_large = - Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); - value = Operation(OperationCode::Select, std::move(is_large), limit, - std::move(value)); - } - } else if (const std::optional bounds = - IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { - value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), - Immediate(bounds->first)); - value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), - Immediate(bounds->second)); - } - } else if (dst_size != Register::Size::Word) { - // No saturation, we only have to mask the result - Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); - value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: - case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - Node value = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2F_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const bool input_signed = instr.conversion.is_input_signed; - - if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { - ASSERT(instr.conversion.src_size == Register::Size::Byte || - instr.conversion.src_size == Register::Size::Short); - if (instr.conversion.src_size == Register::Size::Short) { - ASSERT(offset == 0 || offset == 2); - } - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset * 8)); - } - - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); - value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); - value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: - case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2F_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&] { - if (instr.conversion.src_size != instr.conversion.dst_size) { - // Rounding operations only matter when the source and destination conversion size - // is the same. - return value; - } - switch (instr.conversion.f2f.GetRoundingMode()) { - case Tegra::Shader::F2fRoundingOp::None: - return value; - case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, value); - case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, value); - case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, value); - case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - instr.conversion.f2f.rounding.Value()); - return value; - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: - case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2I_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&]() { - switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::RoundEven: - return Operation(OperationCode::FRoundEven, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - instr.conversion.f2i.rounding.Value()); - return Immediate(0); - } - }(); - const bool is_signed = instr.conversion.is_output_signed; - value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); - - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - if (instr.ffma.tab5980_0 != 1) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); - } - if (instr.ffma.tab5980_1 != 0) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - } - - const Node op_a = GetRegister(instr.gpr8); - - auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - } - case OpCode::Id::FFMA_RR: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::FFMA_RC: { - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - } - case OpCode::Id::FFMA_IMM: - return {GetImmediate19(instr), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); - return {Immediate(0), Immediate(0)}; - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); - op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); - - Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, - instr.fset.neg_a != 0); - - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); - - // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the - // condition is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fset.op); - const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.fset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node predicate = - GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); - const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); - const Node value = Operation(combiner, predicate, second_pred); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.fsetp.pred3, value); - - if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - const Node second_value = Operation(combiner, negated_pred, second_pred); - SetPredicate(bb, instr.fsetp.pred0, second_value); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - PredCondition cond{}; - bool bf = false; - bool ftz = false; - bool neg_a = false; - bool abs_a = false; - bool neg_b = false; - bool abs_b = false; - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - case OpCode::Id::HSET2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - bf = instr.Bit(53); - ftz = instr.Bit(54); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(56); - abs_b = instr.Bit(54); - break; - case OpCode::Id::HSET2_R: - cond = instr.hsetp2.reg.cond; - bf = instr.Bit(49); - ftz = instr.Bit(50); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(31); - abs_b = instr.Bit(30); - break; - default: - UNREACHABLE(); - } - - Node op_b = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - // Inform as unimplemented as this is not tested. - UNIMPLEMENTED_MSG("HSET2_C is not implemented"); - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::HSET2_R: - return GetRegister(instr.gpr20); - case OpCode::Id::HSET2_IMM: - return UnpackHalfImmediate(instr, true); - default: - UNREACHABLE(); - return Node{}; - } - }(); - - if (!ftz) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); - - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_R: - op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); - [[fallthrough]]; - case OpCode::Id::HSET2_C: - op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); - break; - default: - break; - } - - Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - - Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); - - // HSET2 operates on each half float in the pack. - std::array<Node, 2> values; - for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = bf ? 0x3c00 : 0xffff; - Node true_value = Immediate(raw_value << (i * 16)); - Node false_value = Immediate(0); - - Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - Node predicate = Operation(combiner, comparison, second_pred); - values[i] = - Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); - } - - Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); - SetRegister(bb, instr.gpr0, move(value)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (instr.hsetp2.ftz != 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - - Tegra::Shader::PredCondition cond{}; - bool h_and{}; - Node op_b{}; - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_C: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); - // F32 is hardcoded in hardware - op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); - break; - case OpCode::Id::HSETP2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = UnpackHalfImmediate(instr, true); - break; - case OpCode::Id::HSETP2_R: - cond = instr.hsetp2.reg.cond; - h_and = instr.hsetp2.reg.h_and; - op_b = - GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), - instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); - break; - default: - UNREACHABLE(); - op_b = Immediate(0); - } - - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - - const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); - }; - - const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred3; - const u64 second = instr.hsetp2.pred0; - if (h_and) { - Node joined = Operation(OperationCode::LogicalAnd2, comparison); - Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); - } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <tuple> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfPrecision; -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); - } else { - DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); - } - - constexpr auto identity = HalfType::H0_H1; - bool neg_b{}, neg_c{}; - auto [saturate, type_b, op_b, type_c, - op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::HFMA2_CR: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, HalfType::F32, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_RC: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HFMA2_RR: - neg_b = instr.hfma2.rr.negate_b; - neg_c = instr.hfma2.rr.negate_c; - return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), - instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_IMM_R: - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - default: - return {false, identity, Immediate(0), identity, Immediate(0)}; - } - }(); - - const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); - op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); - op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - - Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedHalfFloat(value, saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <vector> -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/textures/texture.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; -using Tegra::Shader::StoreType; -using Tegra::Texture::ComponentType; -using Tegra::Texture::TextureFormat; -using Tegra::Texture::TICEntry; - -namespace { - -ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, - std::size_t component) { - const TextureFormat format{descriptor.format}; - switch (format) { - case TextureFormat::R16G16B16A16: - case TextureFormat::R32G32B32A32: - case TextureFormat::R32G32B32: - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.b_type; - } - if (component == 3) { - return descriptor.a_type; - } - break; - case TextureFormat::A8R8G8B8: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.r_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.b_type; - } - break; - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.r_type; - } - break; - case TextureFormat::R32_B24G8: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - break; - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - if (component == 0) { - return descriptor.b_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.r_type; - } - break; - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - if (component == 0) { - return descriptor.g_type; - } - if (component == 1) { - return descriptor.r_type; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return ComponentType::FLOAT; -} - -bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array<u8, 16> mask = { - 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), - (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask)}.test(component); -} - -u32 GetComponentSize(TextureFormat format, std::size_t component) { - switch (format) { - case TextureFormat::R32G32B32A32: - return 32; - case TextureFormat::R16G16B16A16: - return 16; - case TextureFormat::R32G32B32: - return component <= 2 ? 32 : 0; - case TextureFormat::R32G32: - return component <= 1 ? 32 : 0; - case TextureFormat::R16G16: - return component <= 1 ? 16 : 0; - case TextureFormat::R32: - return component == 0 ? 32 : 0; - case TextureFormat::R16: - return component == 0 ? 16 : 0; - case TextureFormat::R8: - return component == 0 ? 8 : 0; - case TextureFormat::R1: - return component == 0 ? 1 : 0; - case TextureFormat::A8R8G8B8: - return 8; - case TextureFormat::A2B10G10R10: - return (component == 3 || component == 2 || component == 1) ? 10 : 2; - case TextureFormat::A4B4G4R4: - return 4; - case TextureFormat::A5B5G5R1: - return (component == 0 || component == 1 || component == 2) ? 5 : 1; - case TextureFormat::A1B5G5R5: - return (component == 1 || component == 2 || component == 3) ? 5 : 1; - case TextureFormat::R32_B24G8: - if (component == 0) { - return 32; - } - if (component == 1) { - return 24; - } - if (component == 2) { - return 8; - } - return 0; - case TextureFormat::B5G6R5: - if (component == 0 || component == 2) { - return 5; - } - if (component == 1) { - return 6; - } - return 0; - case TextureFormat::B6G5R5: - if (component == 1 || component == 2) { - return 5; - } - if (component == 0) { - return 6; - } - return 0; - case TextureFormat::B10G11R11: - if (component == 1 || component == 2) { - return 11; - } - if (component == 0) { - return 10; - } - return 0; - case TextureFormat::R24G8: - if (component == 0) { - return 8; - } - if (component == 1) { - return 24; - } - return 0; - case TextureFormat::R8G24: - if (component == 0) { - return 24; - } - if (component == 1) { - return 8; - } - return 0; - case TextureFormat::R8G8: - return (component == 0 || component == 1) ? 8 : 0; - case TextureFormat::G4R4: - return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; - } -} - -std::size_t GetImageComponentMask(TextureFormat format) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - switch (format) { - case TextureFormat::R32G32B32A32: - case TextureFormat::R16G16B16A16: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - return std::size_t{R | G | B | A}; - case TextureFormat::R32G32B32: - case TextureFormat::R32_B24G8: - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - return std::size_t{R | G | B}; - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - return std::size_t{R | G}; - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; - } -} - -std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - case Tegra::Shader::ImageType::TextureBuffer: - return 1; - case Tegra::Shader::ImageType::Texture1DArray: - case Tegra::Shader::ImageType::Texture2D: - return 2; - case Tegra::Shader::ImageType::Texture2DArray: - case Tegra::Shader::ImageType::Texture3D: - return 3; - } - UNREACHABLE(); - return 1; -} -} // Anonymous namespace - -std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, - Node original_value) { - switch (component_type) { - case ComponentType::SNORM: { - // range [-1.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); - cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); - return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; - } - case ComponentType::SINT: - case ComponentType::UNORM: { - bool is_signed = component_type == ComponentType::SINT; - // range [0.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast<float>(1 << component_size) - 1.f)); - return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), - is_signed}; - } - case ComponentType::UINT: // range [0, (1 << component_size) - 1] - return {std::move(original_value), false}; - case ComponentType::FLOAT: - if (component_size == 16) { - return {Operation(OperationCode::HCastFloat, original_value), true}; - } else { - return {std::move(original_value), true}; - } - default: - UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); - return {std::move(original_value), true}; - } -} - -u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { - std::vector<Node> coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; - coords.reserve(num_coords); - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - return coords; - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkRead(); - - if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; - } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && - instr.suldst.GetStoreDataLayout() != StoreType::Bits64); - - auto descriptor = [this, instr] { - std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; - if (instr.suldst.is_immediate) { - sampler_descriptor = - registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); - } else { - const Node image_register = GetRegister(instr.gpr39); - const auto result = TrackCbuf(image_register, global_code, - static_cast<s64>(global_code.size())); - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); - } - if (!sampler_descriptor) { - UNREACHABLE_MSG("Failed to obtain image descriptor"); - } - return *sampler_descriptor; - }(); - - const auto comp_mask = GetImageComponentMask(descriptor.format); - - switch (instr.suldst.GetStoreDataLayout()) { - case StoreType::Bits32: - case StoreType::Bits64: { - u32 indexer = 0; - u32 shifted_counter = 0; - Node value = Immediate(0); - for (u32 element = 0; element < 4; ++element) { - if (!IsComponentEnabled(comp_mask, element)) { - continue; - } - const auto component_type = GetComponentType(descriptor, element); - const auto component_size = GetComponentSize(descriptor.format, element); - MetaImage meta{image, {}, element}; - - auto [converted_value, is_signed] = GetComponentValue( - component_type, component_size, - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); - - // shift element to correct position - const auto shifted = shifted_counter; - if (shifted > 0) { - converted_value = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, - std::move(converted_value), Immediate(shifted)); - } - shifted_counter += component_size; - - // add value into result - value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); - - // if we shifted enough for 1 byte -> we save it into temp - if (shifted_counter >= 32) { - SetTemporary(bb, indexer++, std::move(value)); - // reset counter and value to prepare pack next byte - value = Immediate(0); - shifted_counter = 0; - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNREACHABLE(); - break; - } - } - break; - } - case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA - - std::vector<Node> values; - constexpr std::size_t hardcoded_size{4}; - for (std::size_t i = 0; i < hardcoded_size; ++i) { - values.push_back(GetRegister(instr.gpr0.Value() + i)); - } - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkWrite(); - - MetaImage meta{image, std::move(values)}; - bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); - break; - } - case OpCode::Id::SUATOM: { - UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - - const OperationCode operation_code = [instr] { - switch (instr.suatom_d.operation_type) { - case Tegra::Shader::ImageAtomicOperationType::S32: - case Tegra::Shader::ImageAtomicOperationType::U32: - switch (instr.suatom_d.operation) { - case Tegra::Shader::ImageAtomicOperation::Add: - return OperationCode::AtomicImageAdd; - case Tegra::Shader::ImageAtomicOperation::And: - return OperationCode::AtomicImageAnd; - case Tegra::Shader::ImageAtomicOperation::Or: - return OperationCode::AtomicImageOr; - case Tegra::Shader::ImageAtomicOperation::Xor: - return OperationCode::AtomicImageXor; - case Tegra::Shader::ImageAtomicOperation::Exch: - return OperationCode::AtomicImageExchange; - default: - break; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", - static_cast<u64>(instr.suatom_d.operation.Value()), - static_cast<u64>(instr.suatom_d.operation_type.Value())); - return OperationCode::AtomicImageAdd; - }(); - - Node value = GetRegister(instr.gpr0); - - const auto type = instr.suatom_d.image_type; - auto& image = GetImage(instr.image, type); - image.MarkAtomic(); - - MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset = static_cast<u32>(image.index.Value()); - - const auto it = - std::find_if(std::begin(used_images), std::end(used_images), - [offset](const ImageEntry& entry) { return entry.offset == offset; }); - if (it != std::end(used_images)) { - ASSERT(!it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast<u32>(used_images.size()); - return used_images.emplace_back(next_index, offset, type); -} - -ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register = GetRegister(reg); - const auto result = - TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); - - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const ImageEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != std::end(used_images)) { - ASSERT(it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast<u32>(used_images.size()); - return used_images.emplace_back(next_index, offset, buffer, type); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition - // is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); - const Node first_pred = - GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.iset.op); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // We can't use the constant predicate as destination. - ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); - const Node predicate = - GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); - const Node value = Operation(combiner, predicate, second_pred); - SetPredicate(bb, instr.isetp.pred3, value); - - if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <utility> -#include <vector> - -#include <fmt/format.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::AtomicOp; -using Tegra::Shader::AtomicType; -using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::StoreType; - -namespace { - -OperationCode GetAtomOperation(AtomicOp op) { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", op); - return OperationCode::AtomicIAdd; - } -} - -bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { - return uniform_type == Tegra::Shader::UniformType::UnsignedByte || - uniform_type == Tegra::Shader::UniformType::UnsignedShort; -} - -u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 0b11; - case Tegra::Shader::UniformType::UnsignedShort: - return 0b10; - default: - UNREACHABLE(); - return 0; - } -} - -u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 8; - case Tegra::Shader::UniformType::UnsignedShort: - return 16; - case Tegra::Shader::UniformType::Single: - return 32; - case Tegra::Shader::UniformType::Double: - return 64; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 128; - default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); - return 32; - } -} - -Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); -} - -Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), - Immediate(size)); -} - -Node Sign16Extend(Node value) { - Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); - Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::LD_A: { - // Note: Shouldn't this be interp mode flat? As in no interpolation made. - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && - instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, - "Non-32 bits PHYS reads are not implemented"); - - const Node buffer{GetRegister(instr.gpr39)}; - - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); - - const auto LoadNextElement = [&](u32 reg_offset) { - const Node attribute{instr.attribute.fmt20.IsPhysical() - ? GetPhysicalInputAttribute(instr.gpr8, buffer) - : GetInputAttribute(static_cast<Attribute::Index>(next_index), - next_element, buffer)}; - - SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); - - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - LoadNextElement(reg_offset); - } - break; - } - case OpCode::Id::LD_C: { - UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - - Node index = GetRegister(instr.gpr8); - - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - - switch (instr.ld_c.type.Value()) { - case Tegra::Shader::UniformType::Single: - SetRegister(bb, instr.gpr0, op_a); - break; - - case Tegra::Shader::UniformType::Double: { - const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - - SetTemporary(bb, 0, op_a); - SetTemporary(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); - } - break; - } - case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); - [[fallthrough]]; - case OpCode::Id::LD_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - }; - const auto GetMemory = [&](s32 offset) { - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) - : GetLocalMemory(GetAddress(offset)); - }; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: - SetRegister(bb, instr.gpr0, - Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); - break; - case StoreType::Bits32: - case StoreType::Bits64: - case StoreType::Bits128: { - const u32 count = [&] { - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits32: - return 1; - case StoreType::Bits64: - return 2; - case StoreType::Bits128: - return 4; - default: - UNREACHABLE(); - return 0; - } - }(); - for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i, GetMemory(i * 4)); - } - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::LD: - case OpCode::Id::LDG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::LD: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); - return instr.generic.type; - case OpCode::Id::LDG: - return instr.ldg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, false); - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - if (!real_address_base || !base_address) { - // Tracking failed, load zeroes. - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); - } - break; - } - - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - - // To handle unaligned loads get the bytes used to dereference global memory and extract - // those bytes from the loaded u32. - if (IsUnaligned(type)) { - gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); - } - - SetTemporary(bb, i, gmem); - } - - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::ST_A: { - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - - u64 element = instr.attribute.fmt20.element; - auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); - - const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - Node dest; - if (instr.attribute.fmt20.patch) { - const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); - dest = MakeNode<PatchNode>(offset); - } else { - dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, - GetRegister(instr.gpr39)); - } - const auto src = GetRegister(instr.gpr0.Value() + reg_offset); - - bb.push_back(Operation(OperationCode::Assign, dest, src)); - - // Load the next attribute element into the following register. If the element to load - // goes beyond the vec4 size, load the first element of the next attribute. - element = (element + 1) % 4; - index = index + (element == 0 ? 1 : 0); - } - break; - } - case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); - [[fallthrough]]; - case OpCode::Id::ST_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); - }; - - const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; - const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; - const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits128: - (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); - (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); - [[fallthrough]]; - case StoreType::Bits64: - (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); - [[fallthrough]]; - case StoreType::Bits32: - (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); - break; - case StoreType::Unsigned16: - case StoreType::Signed16: { - Node address = GetAddress(0); - Node memory = (this->*get_memory)(address); - (this->*set_memory)( - bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); - break; - } - default: - UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::ST: - case OpCode::Id::STG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::ST: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); - return instr.generic.type; - case OpCode::Id::STG: - return instr.stg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - // For unaligned reads we have to read memory too. - const bool is_read = IsUnaligned(type); - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, is_read, true); - if (!real_address_base || !base_address) { - // Tracking failed, skip the store. - break; - } - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0.Value() + i); - - if (IsUnaligned(type)) { - const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, move(value), real_address, mask, size); - } - - bb.push_back(Operation(OperationCode::Assign, gmem, value)); - } - break; - } - case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - instr.red.type.Value()); - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); - break; - } - case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || - instr.atom.operation == AtomicOp::Dec || - instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", instr.atom.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64 || - instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || - instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", instr.atom.type.Value()); - - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - - const bool is_signed = - instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; - Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || - instr.atoms.operation == AtomicOp::Dec, - "operation={}", instr.atoms.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || - instr.atoms.type == AtomicType::U64, - "type={}", instr.atoms.type.Value()); - const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; - const s32 offset = instr.atoms.GetImmediateOffset(); - Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, - GetSharedMemory(move(address)), GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::AL2P: { - // Ignore al2p.direction since we don't care about it. - - // Calculate emulation fake physical address. - const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; - const Node reg{GetRegister(instr.gpr8)}; - const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; - - // Set the fake address to target register. - SetRegister(bb, instr.gpr0, fake_address); - - // Signal the shader IR to declare all possible attributes and varyings - uses_physical_attributes = true; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_read, bool is_write) { - const auto addr_register{GetRegister(instr.gmem.gpr)}; - const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; - - const auto [base_address, index, offset] = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT_OR_EXECUTE_MSG( - base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); - - bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - - const GlobalMemoryBase descriptor{index, offset}; - const auto& entry = used_global_memory.try_emplace(descriptor).first; - auto& usage = entry->second; - usage.is_written |= is_write; - usage.is_read |= is_read; - - const auto real_address = - Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); - - return {real_address, base_address, descriptor}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::OpCode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::SystemVariable; - -using Index = Tegra::Shader::Attribute::Index; - -u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::NOP: { - UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); - UNIMPLEMENTED_IF(instr.nop.trigger != 0); - // With the previous preconditions, this instruction is a no-operation. - break; - } - case OpCode::Id::EXIT: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); - - switch (instr.flow.cond) { - case Tegra::Shader::FlowCondition::Always: - bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, - // otherwise we have to account for the possibility of the condition - // not being met, so continue processing the next instruction. - pc = MAX_PROGRAM_LENGTH - 1; - } - break; - - case Tegra::Shader::FlowCondition::Fcsm_Tr: - // TODO(bunnei): What is this used for? If we assume this conditon is not - // satisifed, dual vertex shaders in Farming Simulator make more sense - UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); - break; - - default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); - } - break; - } - case OpCode::Id::KIL: { - UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); - - bb.push_back(Operation(OperationCode::Discard)); - break; - } - case OpCode::Id::S2R: { - const Node value = [this, instr] { - switch (instr.sys20) { - case SystemVariable::LaneId: - return Operation(OperationCode::ThreadId); - case SystemVariable::InvocationId: - return Operation(OperationCode::InvocationId); - case SystemVariable::Ydirection: - uses_y_negate = true; - return Operation(OperationCode::YNegate); - case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0x00ff'0000U); - case SystemVariable::WscaleFactorXY: - UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); - return Immediate(0U); - case SystemVariable::WscaleFactorZ: - UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); - return Immediate(0U); - case SystemVariable::Tid: { - Node val = Immediate(0); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return val; - } - case SystemVariable::TidX: - return Operation(OperationCode::LocalInvocationIdX); - case SystemVariable::TidY: - return Operation(OperationCode::LocalInvocationIdY); - case SystemVariable::TidZ: - return Operation(OperationCode::LocalInvocationIdZ); - case SystemVariable::CtaIdX: - return Operation(OperationCode::WorkGroupIdX); - case SystemVariable::CtaIdY: - return Operation(OperationCode::WorkGroupIdY); - case SystemVariable::CtaIdZ: - return Operation(OperationCode::WorkGroupIdZ); - case SystemVariable::EqMask: - case SystemVariable::LtMask: - case SystemVariable::LeMask: - case SystemVariable::GtMask: - case SystemVariable::GeMask: - uses_warps = true; - switch (instr.sys20) { - case SystemVariable::EqMask: - return Operation(OperationCode::ThreadEqMask); - case SystemVariable::LtMask: - return Operation(OperationCode::ThreadLtMask); - case SystemVariable::LeMask: - return Operation(OperationCode::ThreadLeMask); - case SystemVariable::GtMask: - return Operation(OperationCode::ThreadGtMask); - case SystemVariable::GeMask: - return Operation(OperationCode::ThreadGeMask); - default: - UNREACHABLE(); - return Immediate(0u); - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); - return Immediate(0u); - } - }(); - SetRegister(bb, instr.gpr0, value); - - break; - } - case OpCode::Id::BRA: { - Node branch; - if (instr.bra.constant_buffer == 0) { - const u32 target = pc + instr.bra.GetBranchTarget(); - branch = Operation(OperationCode::Branch, Immediate(target)); - } else { - const u32 target = pc + 1; - const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - const Node operand = - Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - branch = Operation(OperationCode::BranchIndirect, operand); - } - - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::BRX: { - Node operand; - if (instr.brx.constant_buffer != 0) { - const s32 target = pc + 1; - const Node index = GetRegister(instr.gpr8); - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } else { - const s32 target = pc + instr.brx.GetBranchExtend(); - const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } - const Node branch = Operation(OperationCode::BranchIndirect, operand); - - const ConditionCode cc = instr.flow_condition_code; - if (cc != ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::SSY: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer flow is not supported"); - - if (disable_flow_stack) { - break; - } - - // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); - break; - } - case OpCode::Id::PBK: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer PBK is not supported"); - - if (disable_flow_stack) { - break; - } - - // PBK pushes to a stack the address where BRK will jump to. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); - break; - } - case OpCode::Id::SYNC: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); - - if (decompiled) { - break; - } - - // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); - break; - } - case OpCode::Id::BRK: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); - if (decompiled) { - break; - } - - // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); - break; - } - case OpCode::Id::IPA: { - const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Index index = attribute.index; - - Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(index, attribute.element); - - // Code taken from Ryujinx. - if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { - const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); - if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { - Node position_w = GetInputAttribute(Index::Position, 3); - value = Operation(OperationCode::FMul, move(value), move(position_w)); - } - } - - if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); - } - - value = GetSaturatedFloat(move(value), instr.ipa.saturate); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::OUT_R: { - UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, - "Stream buffer is not supported"); - - if (instr.out.emit) { - // gpr0 is used to store the next address and gpr8 contains the address to emit. - // Hardware uses pointers here but we just ignore it - bb.push_back(Operation(OperationCode::EmitVertex)); - SetRegister(bb, instr.gpr0, Immediate(0)); - } - if (instr.out.cut) { - bb.push_back(Operation(OperationCode::EndPrimitive)); - } - break; - } - case OpCode::Id::ISBERD: { - UNIMPLEMENTED_IF(instr.isberd.o != 0); - UNIMPLEMENTED_IF(instr.isberd.skew != 0); - UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); - UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); - LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); - SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); - break; - } - case OpCode::Id::BAR: { - UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); - bb.push_back(Operation(OperationCode::Barrier)); - break; - } - case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - const OperationCode type = [instr] { - switch (instr.membar.type) { - case Tegra::Shader::MembarType::CTA: - return OperationCode::MemoryBarrierGroup; - case Tegra::Shader::MembarType::GL: - return OperationCode::MemoryBarrierGlobal; - default: - UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); - return OperationCode::MemoryBarrierGlobal; - } - }(); - bb.push_back(Operation(type)); - break; - } - case OpCode::Id::DEPBAR: { - LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::PSETP: { - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - - if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled - SetPredicate(bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), - second_pred)); - } - break; - } - case OpCode::Id::CSETP: { - const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - const Node condition_code = GetConditionCode(instr.csetp.cc); - - const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); - - if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { - SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); - } - if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); - SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); - - const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); - const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); - - const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); - const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.pset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <utility> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { -constexpr u64 NUM_CONDITION_CODES = 4; -constexpr u64 NUM_PREDICATES = 7; -} // namespace - -u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node apply_mask = [this, opcode, instr] { - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: - case OpCode::Id::P2R_IMM: - return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; - - const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; - const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; - const auto get_entry = [this, cc](u64 entry) { - return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: { - Node mask = GetRegister(instr.gpr8); - - for (u64 entry = 0; entry < num_entries; ++entry) { - const u32 shift = static_cast<u32>(entry); - - Node apply = BitfieldExtract(apply_mask, shift, 1); - Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - - Node compare = BitfieldExtract(mask, offset + shift, 1); - Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - - Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); - bb.push_back(Conditional(condition, {move(code)})); - } - break; - } - case OpCode::Id::P2R_IMM: { - Node value = Immediate(0); - for (u64 entry = 0; entry < num_entries; ++entry) { - Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), - Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); - } - value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::ShfType; -using Tegra::Shader::ShfXmode; - -namespace { - -Node IsFull(Node shift) { - return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); -} - -Node Shift(OperationCode opcode, Node value, Node shift) { - Node shifted = Operation(opcode, move(value), shift); - return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); -} - -Node ClampShift(Node shift, s32 size = 32) { - shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); - return Operation(OperationCode::IMin, move(shift), Immediate(size)); -} - -Node WrapShift(Node shift, s32 size = 32) { - return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); -} - -Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); - Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); - } - - // And these when it's larger than or 32 - const bool is_signed = type == ShfType::S64; - const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(opcode, high, move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); - Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); - } - - // And these when it's larger than or 32 - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [this, instr] { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (const auto opid = opcode->get().GetId(); opid) { - case OpCode::Id::SHR_C: - case OpCode::Id::SHR_R: - case OpCode::Id::SHR_IMM: { - op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); - - Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - move(op_a), move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHL_C: - case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: { - Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHF_RIGHT_R: - case OpCode::Id::SHF_RIGHT_IMM: - case OpCode::Id::SHF_LEFT_R: - case OpCode::Id::SHF_LEFT_IMM: { - UNIMPLEMENTED_IF(instr.generates_cc); - UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - instr.shf.xmode.Value()); - - if (instr.is_b_imm) { - op_b = Immediate(static_cast<u32>(instr.shf.immediate)); - } - const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; - Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); - - Node negated_shift = Operation(OperationCode::INegate, shift); - Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); - - const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; - Node value = (is_right ? ShiftRight : ShiftLeft)( - move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null @@ -1,935 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <vector> -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); - return 0; - } -} - -u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - bool is_bindless = false; - switch (opcode->get().GetId()) { - case OpCode::Id::TEX: { - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); - break; - } - case OpCode::Id::TEX_B: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const TextureType texture_type{instr.tex_b.texture_type}; - const bool is_array = instr.tex_b.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, is_aoffi, {instr.gpr20})); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4_B: { - is_bindless = true; - [[fallthrough]]; - } - case OpCode::Id::TLD4: { - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) - : instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) - : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) - : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, - is_ptp, is_bindless)); - break; - } - case OpCode::Id::TLD4S: { - constexpr std::size_t num_coords = 2; - const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector<Node> coords; - std::vector<Node> aoffi; - Node depth_compare; - if (is_depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - if (is_aoffi) { - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - depth_compare = GetRegister(instr.gpr20.Value() + 1); - } else { - depth_compare = op_b; - } - } else { - // There's no depth compare - coords.push_back(op_a); - if (is_aoffi) { - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - } else { - coords.push_back(op_b); - } - } - const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); - - SamplerInfo info; - info.is_shadow = is_depth_compare; - const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, - {}, {}, component, element, {}}; - values[element] = Operation(OperationCode::TextureGather, meta, coords); - } - - if (instr.tld4s.fp16_flag) { - WriteTexsInstructionHalfFloat(bb, instr, values, true); - } else { - WriteTexsInstructionFloat(bb, instr, values, true); - } - break; - } - case OpCode::Id::TXD_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXD: { - UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const bool is_array = instr.txd.is_array != 0; - const auto derivate_reg = instr.gpr20.Value(); - const auto texture_type = instr.txd.texture_type.Value(); - const auto coord_count = GetCoordCount(texture_type); - u64 base_reg = instr.gpr8.Value(); - Node index_var; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); - WriteTexInstructionFloat(bb, instr, values); - break; - } - - if (is_bindless) { - base_reg++; - } - - std::vector<Node> coords; - std::vector<Node> derivates; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(base_reg + i)); - const std::size_t derivate = i * 2; - derivates.push_back(GetRegister(derivate_reg + derivate)); - derivates.push_back(GetRegister(derivate_reg + derivate + 1)); - } - - Node array_node = {}; - if (is_array) { - const Node info_reg = GetRegister(base_reg + coord_count); - array_node = BitfieldExtract(info_reg, 0, 16); - } - - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, - {}, {}, {}, element, index_var}; - values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); - } - - WriteTexInstructionFloat(bb, instr, values); - - break; - } - case OpCode::Id::TXQ_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXQ: { - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, - GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); - } - break; - } - case OpCode::Id::TMML_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - const auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) - : GetSampler(instr.sampler, info); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - const u64 base_index = is_array ? 1 : 0; - const u64 num_components = [texture_type] { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); - return 2; - } - }(); - // TODO: What's the array component used for? - - std::vector<Node> coords; - coords.reserve(num_components); - for (u64 component = 0; component < num_components; ++component) { - coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); - } - - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - Node value = Operation(OperationCode::TextureQueryLod, meta, coords); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::TLD: { - UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); - break; - } - case OpCode::Id::TLDS: { - const TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - const Node4 components = GetTldsCode(instr, texture_type, is_array); - - if (instr.tlds.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( - SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { - if (info.IsComplete()) { - return info; - } - if (!sampler) { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); - info.is_array = info.is_array.value_or(false); - info.is_shadow = info.is_shadow.value_or(false); - info.is_buffer = info.is_buffer.value_or(false); - return info; - } - info.type = info.type.value_or(sampler->texture_type); - info.is_array = info.is_array.value_or(sampler->is_array != 0); - info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); - info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); - return info; -} - -std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { - const u32 offset = static_cast<u32>(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const SamplerEntry& entry) { return entry.offset == offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); -} - -std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, - SamplerInfo info, Node& index_var) { - const Node sampler_register = GetRegister(reg); - const auto [base_node, tracked_sampler_info] = - TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); - if (!base_node) { - UNREACHABLE(); - return std::nullopt; - } - - if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { - const u32 buffer = sampler_info->index; - const u32 offset = sampler_info->offset; - info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const SamplerEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); - } - if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { - const std::pair indices = sampler_info->indices; - const std::pair offsets = sampler_info->offsets; - info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); - - // Try to use an already created sampler if it exists - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [indices, offsets](const SamplerEntry& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const u32 next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer); - } - if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { - const u32 base_offset = sampler_info->base_offset / 4; - index_var = GetCustomVariable(sampler_info->bindless_var); - info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), - [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && - it->is_indexed); - return *it; - } - - uses_indexed_samplers = true; - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, true); - } - return std::nullopt; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporary(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, - bool ignore_mask) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - SetTemporary(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components, bool ignore_mask) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporary(bb, 0, first_value); - SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr28, GetTemporary(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector<Node> aoffi, - std::optional<Tegra::Shader::Register> bindless_reg) { - const bool is_array = array != nullptr; - const bool is_shadow = depth_compare != nullptr; - const bool is_bindless = bindless_reg.has_value(); - - ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, - "Illegal texture type"); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = is_shadow; - info.is_buffer = false; - - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); - if (!sampler) { - return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; - } - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; - - Node bias; - Node lod; - switch (process_mode) { - case TextureProcessMode::None: - break; - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 field with - // an offset depending on the usage of the other registers. - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); - break; - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, - lod, {}, element, index_var}; - values[element] = Operation(opcode, meta, coords); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const bool lod_bias_enabled{ - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; - - const bool is_bindless = bindless_reg.has_value(); - - u64 parameter_register = instr.gpr20.Value(); - if (is_bindless) { - ++parameter_register; - } - - const u32 bias_lod_offset = (is_bindless ? 1 : 0); - if (lod_bias_enabled) { - ++parameter_register; - } - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 5); - const auto coord_count = std::get<0>(coord_counts); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - std::vector<Node> aoffi; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); - } - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - dc = GetRegister(parameter_register++); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, - aoffi, bindless_reg); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 4); - const auto coord_count = std::get<0>(coord_counts); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, - {}); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { - ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); - - const std::size_t coord_count = GetCoordCount(texture_type); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - - u64 parameter_register = instr.gpr20.Value(); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = depth_compare; - - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; - } - - std::vector<Node> aoffi, ptp; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); - } else if (is_ptp) { - ptp = GetPtpCoordinates( - {GetRegister(parameter_register++), GetRegister(parameter_register++)}); - } - - Node dc; - if (depth_compare) { - dc = GetRegister(parameter_register++); - } - - const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) - : Immediate(static_cast<u32>(instr.tld4.component)); - - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, - index_var}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { - const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array != 0}; - const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; - const std::size_t coord_count{GetCoordCount(texture_type)}; - - u64 gpr8_cursor{instr.gpr8.Value()}; - const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; - - std::vector<Node> coords; - coords.reserve(coord_count); - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(gpr8_cursor++)); - } - - u64 gpr20_cursor{instr.gpr20.Value()}; - // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; - const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; - // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; - // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - - const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = false; - const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); - - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - - std::vector<Node> coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - std::vector<Node> aoffi; - if (aoffi_enabled) { - aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - -std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, - bool is_tld4) { - const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; - const u32 size = is_tld4 ? 6 : 4; - const s32 wrap_value = is_tld4 ? 32 : 8; - const s32 diff_value = is_tld4 ? 64 : 16; - const u32 mask = (1U << size) - 1; - - std::vector<Node> aoffi; - aoffi.reserve(coord_count); - - const auto aoffi_immediate{ - TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; - if (!aoffi_immediate) { - // Variable access, not supported on AMD. - LOG_WARNING(HW_GPU, - "AOFFI constant folding failed, some hardware might have graphical issues"); - for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); - aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return aoffi; - } - - for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; - if (value >= wrap_value) { - value -= diff_value; - } - aoffi.push_back(Immediate(value)); - } - return aoffi; -} - -std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { - static constexpr u32 num_entries = 8; - - std::vector<Node> ptp; - ptp.reserve(num_entries); - - const auto global_size = static_cast<s64>(global_code.size()); - const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); - const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); - if (!low || !high) { - for (u32 entry = 0; entry < num_entries; ++entry) { - const u32 reg = entry / 4; - const u32 offset = entry % 4; - const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); - ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return ptp; - } - - const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); - for (u32 entry = 0; entry < num_entries; ++entry) { - s32 value = (immediate >> (entry * 8)) & 0b111111; - if (value >= 32) { - value -= 64; - } - ptp.push_back(Immediate(value)); - } - - return ptp; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::VideoType; -using Tegra::Shader::VmadShr; -using Tegra::Shader::VmnmxOperation; -using Tegra::Shader::VmnmxType; - -u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::VMNMX) { - DecodeVMNMX(bb, instr); - return pc; - } - - const Node op_a = - GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, - instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [this, instr] { - if (instr.video.use_register_b) { - return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, - instr.video.signed_b, instr.video.type_b, - instr.video.byte_height_b); - } - if (instr.video.signed_b) { - const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); - return Immediate(static_cast<u32>(imm)); - } else { - return Immediate(instr.alu.GetImm20_16()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::VMAD: { - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node op_c = GetRegister(instr.gpr39); - - Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); - value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); - - if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { - const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); - value = - SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::VSETP: { - // We can't use the constant predicate as destination. - ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); - const Node second_pred = GetPredicate(instr.vsetp.pred39, false); - - const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); - - if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, - u64 byte_height) { - if (!is_chunk) { - return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); - } - - switch (type) { - case VideoType::Size16_Low: - return BitfieldExtract(op, 0, 16); - case VideoType::Size16_High: - return BitfieldExtract(op, 16, 16); - case VideoType::Size32: - // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used - // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. - UNIMPLEMENTED(); - return Immediate(0); - case VideoType::Invalid: - UNREACHABLE_MSG("Invalid instruction encoding"); - return Immediate(0); - default: - UNREACHABLE(); - return Immediate(0); - } -} - -void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { - UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); - UNIMPLEMENTED_IF(instr.vmnmx.sat); - UNIMPLEMENTED_IF(instr.generates_cc); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node op_c = GetRegister(instr.gpr39); - - const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed - const bool is_oper2_signed = instr.vmnmx.is_dest_signed; - - const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; - Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); - - switch (instr.vmnmx.operation) { - case VmnmxOperation::Mrg_16H: - value = BitfieldInsert(move(op_c), move(value), 16, 16); - break; - case VmnmxOperation::Mrg_16L: - value = BitfieldInsert(move(op_c), move(value), 0, 16); - break; - case VmnmxOperation::Mrg_8B0: - value = BitfieldInsert(move(op_c), move(value), 0, 8); - break; - case VmnmxOperation::Mrg_8B2: - value = BitfieldInsert(move(op_c), move(value), 16, 8); - break; - case VmnmxOperation::Acc: - value = Operation(OperationCode::IAdd, move(value), move(op_c)); - break; - case VmnmxOperation::Min: - value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Max: - value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Nop: - break; - default: - UNREACHABLE(); - break; - } - - SetRegister(bb, instr.gpr0, move(value)); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::ShuffleOperation; -using Tegra::Shader::VoteOperation; - -namespace { - -OperationCode GetOperationCode(VoteOperation vote_op) { - switch (vote_op) { - case VoteOperation::All: - return OperationCode::VoteAll; - case VoteOperation::Any: - return OperationCode::VoteAny; - case VoteOperation::Eq: - return OperationCode::VoteEqual; - default: - UNREACHABLE_MSG("Invalid vote operation={}", vote_op); - return OperationCode::VoteAll; - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - // Signal the backend that this shader uses warp instructions. - uses_warps = true; - - switch (opcode->get().GetId()) { - case OpCode::Id::VOTE: { - const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); - const Node active = Operation(OperationCode::BallotThread, value); - const Node vote = Operation(GetOperationCode(instr.vote.operation), value); - SetRegister(bb, instr.gpr0, active); - SetPredicate(bb, instr.vote.dest_pred, vote); - break; - } - case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - - Node thread_id = Operation(OperationCode::ThreadId); - Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); - Node seg_mask = BitfieldExtract(mask, 8, 16); - - Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); - Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); - Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, - Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - - Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { - switch (instr.shfl.operation) { - case ShuffleOperation::Idx: - return Operation(OperationCode::IBitwiseOr, - Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), - min_thread_id); - case ShuffleOperation::Down: - return Operation(OperationCode::IAdd, thread_id, index); - case ShuffleOperation::Up: - return Operation(OperationCode::IAdd, thread_id, - Operation(OperationCode::INegate, index)); - case ShuffleOperation::Bfly: - return Operation(OperationCode::IBitwiseXor, thread_id, index); - } - UNREACHABLE(); - return Immediate(0U); - }(); - - Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); - } else { - return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); - } - }(); - - SetPredicate(bb, instr.shfl.pred48, in_bounds); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); - break; - } - case OpCode::Id::FSWZADD: { - UNIMPLEMENTED_IF(instr.fswzadd.ndv); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); - SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF(instr.xmad.sign_a); - UNIMPLEMENTED_IF(instr.xmad.sign_b); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); - - Node op_a = GetRegister(instr.gpr8); - - // TODO(bunnei): Needs to be fixed once op_a or op_b is signed - UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); - const bool is_signed_a = instr.xmad.sign_a == 1; - const bool is_signed_b = instr.xmad.sign_b == 1; - const bool is_signed_c = is_signed_a; - - auto [is_merge, is_psl, is_high_b, mode, op_b_binding, - op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, - instr.xmad.product_shift_left_second, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, - instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RC: - return {false, - false, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, - instr.xmad.product_shift_left, - false, - instr.xmad.mode, - Immediate(static_cast<u32>(instr.xmad.imm20_16)), - GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; - } - }(); - - op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), - instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); - - const Node original_b = op_b_binding; - const Node op_b = - SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), - is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - - // we already check sign_a and sign_b is difference or not before so just use one in here. - Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); - if (is_psl) { - product = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); - } - SetTemporary(bb, 0, product); - product = GetTemporary(0); - - Node original_c = op_c; - const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&] { - switch (set_mode) { - case Tegra::Shader::XmadMode::None: - return original_c; - case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(std::move(original_c), 0, 16); - case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(std::move(original_c), 16, 16); - case Tegra::Shader::XmadMode::CBcc: { - Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), - std::move(shifted_b)); - } - case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); - const Node comp_b = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); - const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); - - const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_a, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, - Immediate(0x80000000)), - Immediate(0)); - const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_b, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, - Immediate(0x80000000)), - Immediate(0)); - - Node new_c = Operation( - OperationCode::Select, comp_minus_a, - SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), - original_c); - new_c = Operation( - OperationCode::Select, comp_minus_b, - SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), - std::move(new_c)); - - return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); - } - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - SetTemporary(bb, 1, op_c); - op_c = GetTemporary(1); - - // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); - SetTemporary(bb, 2, sum); - sum = GetTemporary(2); - if (is_merge) { - const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), - Immediate(0), Immediate(16)); - const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, - Immediate(16)); - sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); - } - - SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(sum)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <memory> -#include <variant> - -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { -namespace { -bool ExprIsBoolean(const Expr& expr) { - return std::holds_alternative<ExprBoolean>(*expr); -} - -bool ExprBooleanGet(const Expr& expr) { - return std::get_if<ExprBoolean>(expr.get())->value; -} -} // Anonymous namespace - -bool ExprAnd::operator==(const ExprAnd& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprAnd::operator!=(const ExprAnd& b) const { - return !operator==(b); -} - -bool ExprOr::operator==(const ExprOr& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprOr::operator!=(const ExprOr& b) const { - return !operator==(b); -} - -bool ExprNot::operator==(const ExprNot& b) const { - return *operand1 == *b.operand1; -} - -bool ExprNot::operator!=(const ExprNot& b) const { - return !operator==(b); -} - -Expr MakeExprNot(Expr first) { - if (std::holds_alternative<ExprNot>(*first)) { - return std::get_if<ExprNot>(first.get())->operand1; - } - return MakeExpr<ExprNot>(std::move(first)); -} - -Expr MakeExprAnd(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? second : first; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? first : second; - } - return MakeExpr<ExprAnd>(std::move(first), std::move(second)); -} - -Expr MakeExprOr(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? first : second; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? second : first; - } - return MakeExpr<ExprOr>(std::move(first), std::move(second)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second) { - return (*first) == (*second); -} - -bool ExprAreOpposite(const Expr& first, const Expr& second) { - if (std::holds_alternative<ExprNot>(*first)) { - return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); - } - if (std::holds_alternative<ExprNot>(*second)) { - return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); - } - return false; -} - -bool ExprIsTrue(const Expr& first) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first); - } - return false; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <variant> - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -class ExprAnd; -class ExprBoolean; -class ExprCondCode; -class ExprGprEqual; -class ExprNot; -class ExprOr; -class ExprPredicate; -class ExprVar; - -using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, - ExprBoolean, ExprGprEqual>; -using Expr = std::shared_ptr<ExprData>; - -class ExprAnd final { -public: - explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprAnd& b) const; - bool operator!=(const ExprAnd& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprOr final { -public: - explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprOr& b) const; - bool operator!=(const ExprOr& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprNot final { -public: - explicit ExprNot(Expr a) : operand1{std::move(a)} {} - - bool operator==(const ExprNot& b) const; - bool operator!=(const ExprNot& b) const; - - Expr operand1; -}; - -class ExprVar final { -public: - explicit ExprVar(u32 index) : var_index{index} {} - - bool operator==(const ExprVar& b) const { - return var_index == b.var_index; - } - - bool operator!=(const ExprVar& b) const { - return !operator==(b); - } - - u32 var_index; -}; - -class ExprPredicate final { -public: - explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} - - bool operator==(const ExprPredicate& b) const { - return predicate == b.predicate; - } - - bool operator!=(const ExprPredicate& b) const { - return !operator==(b); - } - - u32 predicate; -}; - -class ExprCondCode final { -public: - explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} - - bool operator==(const ExprCondCode& b) const { - return cc == b.cc; - } - - bool operator!=(const ExprCondCode& b) const { - return !operator==(b); - } - - ConditionCode cc; -}; - -class ExprBoolean final { -public: - explicit ExprBoolean(bool val) : value{val} {} - - bool operator==(const ExprBoolean& b) const { - return value == b.value; - } - - bool operator!=(const ExprBoolean& b) const { - return !operator==(b); - } - - bool value; -}; - -class ExprGprEqual final { -public: - explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} - - bool operator==(const ExprGprEqual& b) const { - return gpr == b.gpr && value == b.value; - } - - bool operator!=(const ExprGprEqual& b) const { - return !operator==(b); - } - - u32 gpr; - u32 value; -}; - -template <typename T, typename... Args> -Expr MakeExpr(Args&&... args) { - static_assert(std::is_convertible_v<T, ExprData>); - return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second); - -bool ExprAreOpposite(const Expr& first, const Expr& second); - -Expr MakeExprNot(Expr first); - -Expr MakeExprAnd(Expr first, Expr second); - -Expr MakeExprOr(Expr first, Expr second); - -bool ExprIsTrue(const Expr& first); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <cstddef> - -#include <boost/container_hash/hash.hpp> - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; - return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; -} - -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; - static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; - - const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & MASK) == SELF_JUMPING_BRANCH) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code, is_compute)); - return code; -} - -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { - size_t unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return static_cast<u64>(unique_identifier); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon::Shader { - -using ProgramCode = std::vector<u64>; - -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); - -/// Gets if the current instruction offset is a scheduler instruction -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute); - -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <cstddef> -#include <memory> -#include <optional> -#include <string> -#include <tuple> -#include <utility> -#include <variant> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float - FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - FSwizzleAdd, /// (float a, float b, uint mask) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - IBitMSB, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - UBitMSB, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAnd2, /// (bool2 a) -> bool - - LogicalFOrdLessThan, /// (float a, float b) -> bool - LogicalFOrdEqual, /// (float a, float b) -> bool - LogicalFOrdLessEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterThan, /// (float a, float b) -> bool - LogicalFOrdNotEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterEqual, /// (float a, float b) -> bool - LogicalFOrdered, /// (float a, float b) -> bool - LogicalFUnordered, /// (float a, float b) -> bool - LogicalFUnordLessThan, /// (float a, float b) -> bool - LogicalFUnordEqual, /// (float a, float b) -> bool - LogicalFUnordLessEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterThan, /// (float a, float b) -> bool - LogicalFUnordNotEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterEqual, /// (float a, float b) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - LogicalAddCarry, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 - - ImageLoad, /// (MetaImage, int[N] coords) -> void - ImageStore, /// (MetaImage, int[N] coords) -> void - - AtomicImageAdd, /// (MetaImage, int[N] coords) -> void - AtomicImageAnd, /// (MetaImage, int[N] coords) -> void - AtomicImageOr, /// (MetaImage, int[N] coords) -> void - AtomicImageXor, /// (MetaImage, int[N] coords) -> void - AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - - AtomicUExchange, /// (memory, uint) -> uint - AtomicUAdd, /// (memory, uint) -> uint - AtomicUMin, /// (memory, uint) -> uint - AtomicUMax, /// (memory, uint) -> uint - AtomicUAnd, /// (memory, uint) -> uint - AtomicUOr, /// (memory, uint) -> uint - AtomicUXor, /// (memory, uint) -> uint - - AtomicIExchange, /// (memory, int) -> int - AtomicIAdd, /// (memory, int) -> int - AtomicIMin, /// (memory, int) -> int - AtomicIMax, /// (memory, int) -> int - AtomicIAnd, /// (memory, int) -> int - AtomicIOr, /// (memory, int) -> int - AtomicIXor, /// (memory, int) -> int - - ReduceUAdd, /// (memory, uint) -> void - ReduceUMin, /// (memory, uint) -> void - ReduceUMax, /// (memory, uint) -> void - ReduceUAnd, /// (memory, uint) -> void - ReduceUOr, /// (memory, uint) -> void - ReduceUXor, /// (memory, uint) -> void - - ReduceIAdd, /// (memory, int) -> void - ReduceIMin, /// (memory, int) -> void - ReduceIMax, /// (memory, int) -> void - ReduceIAnd, /// (memory, int) -> void - ReduceIOr, /// (memory, int) -> void - ReduceIXor, /// (memory, int) -> void - - Branch, /// (uint branch_target) -> void - BranchIndirect, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - InvocationId, /// () -> int - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - BallotThread, /// (bool) -> uint - VoteAll, /// (bool) -> bool - VoteAny, /// (bool) -> bool - VoteEqual, /// (bool) -> bool - - ThreadId, /// () -> uint - ThreadEqMask, /// () -> uint - ThreadGeMask, /// () -> uint - ThreadGtMask, /// () -> uint - ThreadLeMask, /// () -> uint - ThreadLtMask, /// () -> uint - ShuffleIndexed, /// (uint value, uint index) -> uint - - Barrier, /// () -> void - MemoryBarrierGroup, /// () -> void - MemoryBarrierGlobal, /// () -> void - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - -enum class MetaStackClass { - Ssy, - Pbk, -}; - -class OperationNode; -class ConditionalNode; -class GprNode; -class CustomVarNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; -class CbufNode; -class LmemNode; -class PatchNode; -class SmemNode; -class GmemNode; -class CommentNode; - -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, - InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, - LmemNode, SmemNode, GmemNode, CommentNode>; -using Node = std::shared_ptr<NodeData>; -using Node4 = std::array<Node, 4>; -using NodeBlock = std::vector<Node>; - -struct ArraySamplerNode; -struct BindlessSamplerNode; -struct SeparateSamplerNode; - -using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; -using TrackSampler = std::shared_ptr<TrackSamplerData>; - -struct SamplerEntry { - /// Bound samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, - bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, - is_buffer{is_buffer_}, is_indexed{is_indexed_} {} - - /// Separate sampler constructor - explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} - - /// Bindless samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { - } - - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 secondary_offset = 0; ///< Secondary offset in the const buffer. - u32 buffer = 0; ///< Buffer where the bindless sampler is read. - u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. - u32 size = 1; ///< Size of the sampler. - - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. - bool is_separated = false; ///< Whether the image and sampler is separated or not. -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct ArraySamplerNode { - u32 index; - u32 base_offset; - u32 bindless_var; -}; - -/// Represents a tracked separate sampler image pair that was folded statically -struct SeparateSamplerNode { - std::pair<u32, u32> indices; - std::pair<u32, u32> offsets; -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct BindlessSamplerNode { - u32 index; - u32 offset; -}; - -struct ImageEntry { -public: - /// Bound images constructor - explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, type{type_} {} - - /// Bindless samplers constructor - explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} - - void MarkWrite() { - is_written = true; - } - - void MarkRead() { - is_read = true; - } - - void MarkAtomic() { - MarkWrite(); - MarkRead(); - is_atomic = true; - } - - u32 index = 0; - u32 offset = 0; - u32 buffer = 0; - - Tegra::Shader::ImageType type{}; - bool is_bindless = false; - bool is_written = false; - bool is_read = false; - bool is_atomic = false; -}; - -struct GlobalMemoryBase { - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - - [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - -/// Parameters describing an arithmetic operation -struct MetaArithmetic { - bool precise{}; ///< Whether the operation can be constraint or not -}; - -/// Parameters describing a texture sampler -struct MetaTexture { - SamplerEntry sampler; - Node array; - Node depth_compare; - std::vector<Node> aoffi; - std::vector<Node> ptp; - std::vector<Node> derivates; - Node bias; - Node lod; - Node component; - u32 element{}; - Node index; -}; - -struct MetaImage { - const ImageEntry& image; - std::vector<Node> values; - u32 element{}; -}; - -/// Parameters that modify an operation but are not part of any particular operand -using Meta = - std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; - -class AmendNode { -public: - [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const { - if (amend_index == amend_null_index) { - return std::nullopt; - } - return {amend_index}; - } - - void SetAmendIndex(std::size_t index) { - amend_index = index; - } - - void ClearAmend() { - amend_index = amend_null_index; - } - -private: - static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; - std::size_t amend_index{amend_null_index}; -}; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final : public AmendNode { -public: - explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - - explicit OperationNode(OperationCode code_, Meta meta_) - : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {} - - explicit OperationNode(OperationCode code_, std::vector<Node> operands_) - : OperationNode(code_, Meta{}, std::move(operands_)) {} - - explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_) - : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} - - template <typename... Args> - explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) - : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - - [[nodiscard]] OperationCode GetCode() const { - return code; - } - - [[nodiscard]] const Meta& GetMeta() const { - return meta; - } - - [[nodiscard]] std::size_t GetOperandsCount() const { - return operands.size(); - } - - [[nodiscard]] const Node& operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - OperationCode code{}; - Meta meta{}; - std::vector<Node> operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final : public AmendNode { -public: - explicit ConditionalNode(Node condition_, std::vector<Node>&& code_) - : condition{std::move(condition_)}, code{std::move(code_)} {} - - [[nodiscard]] const Node& GetCondition() const { - return condition; - } - - [[nodiscard]] const std::vector<Node>& GetCode() const { - return code; - } - -private: - Node condition; ///< Condition to be satisfied - std::vector<Node> code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return static_cast<u32>(index); - } - -private: - Tegra::Shader::Register index{}; -}; - -/// A custom variable -class CustomVarNode final { -public: - explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - - [[nodiscard]] constexpr u32 GetValue() const { - return value; - } - -private: - u32 value{}; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - - [[nodiscard]] constexpr InternalFlag GetFlag() const { - return flag; - } - -private: - InternalFlag flag{}; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) - : index{index_}, negated{negated_} {} - - [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { - return index; - } - - [[nodiscard]] constexpr bool IsNegated() const { - return negated; - } - -private: - Tegra::Shader::Pred index{}; - bool negated{}; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) - : buffer{std::move(buffer_)}, index{index_}, element{element_} {} - - // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address_, Node buffer_ = {}) - : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - - [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - [[nodiscard]] u32 GetElement() const { - return element; - } - - [[nodiscard]] const Node& GetBuffer() const { - return buffer; - } - - [[nodiscard]] bool IsPhysicalBuffer() const { - return static_cast<bool>(physical_address); - } - - [[nodiscard]] const Node& GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address; - Node buffer; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Patch memory (used to communicate tessellation stages). -class PatchNode final { -public: - explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - - [[nodiscard]] constexpr u32 GetOffset() const { - return offset; - } - -private: - u32 offset{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - - [[nodiscard]] u32 GetIndex() const { - return index; - } - - [[nodiscard]] const Node& GetOffset() const { - return offset; - } - -private: - u32 index{}; - Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit LmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Shared memory node -class SmemNode final { -public: - explicit SmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) - : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, - descriptor{descriptor_} {} - - [[nodiscard]] const Node& GetRealAddress() const { - return real_address; - } - - [[nodiscard]] const Node& GetBaseAddress() const { - return base_address; - } - - [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - Node real_address; - Node base_address; - GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text_) : text{std::move(text_)} {} - - [[nodiscard]] const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> -#include <vector> - -#include "common/common_types.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -Node Conditional(Node condition, std::vector<Node> code) { - return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); -} - -Node Comment(std::string text) { - return MakeNode<CommentNode>(std::move(text)); -} - -Node Immediate(u32 value) { - return MakeNode<ImmediateNode>(value); -} - -Node Immediate(s32 value) { - return Immediate(static_cast<u32>(value)); -} - -Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); -} - -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldExtract: - return OperationCode::UBitfieldExtract; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::AtomicIExchange: - return OperationCode::AtomicUExchange; - case OperationCode::AtomicIAdd: - return OperationCode::AtomicUAdd; - case OperationCode::AtomicIMin: - return OperationCode::AtomicUMin; - case OperationCode::AtomicIMax: - return OperationCode::AtomicUMax; - case OperationCode::AtomicIAnd: - return OperationCode::AtomicUAnd; - case OperationCode::AtomicIOr: - return OperationCode::AtomicUOr; - case OperationCode::AtomicIXor: - return OperationCode::AtomicUXor; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); - return {}; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <string> -#include <tuple> -#include <type_traits> -#include <utility> -#include <vector> - -#include "common/common_types.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -/// This arithmetic operation cannot be constraint -inline constexpr MetaArithmetic PRECISE = {true}; -/// This arithmetic operation can be optimized away -inline constexpr MetaArithmetic NO_PRECISE = {false}; - -/// Creates a conditional node -Node Conditional(Node condition, std::vector<Node> code); - -/// Creates a commentary node -Node Comment(std::string text); - -/// Creates an u32 immediate -Node Immediate(u32 value); - -/// Creates a s32 immediate -Node Immediate(s32 value); - -/// Creates a f32 immediate -Node Immediate(f32 value); - -/// Converts an signed operation code to an unsigned operation code -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - -template <typename T, typename... Args> -Node MakeNode(Args&&... args) { - static_assert(std::is_convertible_v<T, NodeData>); - return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); -} - -template <typename T, typename... Args> -TrackSampler MakeTrackSampler(Args&&... args) { - static_assert(std::is_convertible_v<T, TrackSamplerData>); - return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); -} - -template <typename... Args> -Node Operation(OperationCode code, Args&&... args) { - if constexpr (sizeof...(args) == 0) { - return MakeNode<OperationNode>(code); - } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, - Meta>) { - return MakeNode<OperationNode>(code, std::forward<Args>(args)...); - } else { - return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); - } -} - -template <typename... Args> -Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { - return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <tuple> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::ConstBufferEngineInterface; -using Tegra::Engines::SamplerDescriptor; -using Tegra::Engines::ShaderType; - -namespace { - -GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage == ShaderType::Compute) { - return {}; - } - - auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); - - return { - .tfb_layouts = graphics.regs.tfb_layouts, - .tfb_varying_locs = graphics.regs.tfb_varying_locs, - .primitive_topology = graphics.regs.draw.topology, - .tessellation_primitive = graphics.regs.tess_mode.prim, - .tessellation_spacing = graphics.regs.tess_mode.spacing, - .tfb_enabled = graphics.regs.tfb_enabled != 0, - .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, - }; -} - -ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage != ShaderType::Compute) { - return {}; - } - - auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); - const auto& launch = compute.launch_description; - - return { - .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, - .shared_memory_size_in_words = launch.shared_alloc, - .local_memory_size_in_words = launch.local_pos_alloc, - }; -} - -} // Anonymous namespace - -Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) - : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, - bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} - -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) - : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( - shader_stage, engine_)} {} - -Registry::~Registry() = default; - -std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { - const std::pair<u32, u32> key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( - std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { - SeparateSamplerKey key; - key.buffers = buffers; - key.offsets = offsets; - const auto iter = separate_samplers.find(key); - if (iter != separate_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - - const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); - const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); - const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); - separate_samplers.emplace(key, value); - return value; -} - -std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -bool Registry::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool Registry::HasEqualKeys(const Registry& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -const GraphicsInfo& Registry::GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; -} - -const ComputeInfo& Registry::GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <optional> -#include <type_traits> -#include <unordered_map> -#include <utility> - -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -struct SeparateSamplerKey { - std::pair<u32, u32> buffers; - std::pair<u32, u32> offsets; -}; - -} // namespace VideoCommon::Shader - -namespace std { - -template <> -struct hash<VideoCommon::Shader::SeparateSamplerKey> { - std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { - return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ - key.offsets.second); - } -}; - -template <> -struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { - bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, - const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { - return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; - } -}; - -} // namespace std - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; -using SeparateSamplerMap = - std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; -using BindlessSamplerMap = - std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -struct GraphicsInfo { - using Maxwell = Tegra::Engines::Maxwell3D::Regs; - - std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> - tfb_layouts{}; - std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; - Maxwell::PrimitiveTopology primitive_topology{}; - Maxwell::TessellationPrimitive tessellation_primitive{}; - Maxwell::TessellationSpacing tessellation_spacing{}; - bool tfb_enabled = false; - bool tessellation_clockwise = false; -}; -static_assert(std::is_trivially_copyable_v<GraphicsInfo> && - std::is_standard_layout_v<GraphicsInfo>); - -struct ComputeInfo { - std::array<u32, 3> workgroup_size{}; - u32 shared_memory_size_in_words = 0; - u32 local_memory_size_in_words = 0; -}; -static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); - -struct SerializedRegistryInfo { - VideoCore::GuestDriverProfile guest_driver_profile; - u32 bound_buffer = 0; - GraphicsInfo graphics; - ComputeInfo compute; -}; - -/** - * The Registry is a class use to interface the 3D and compute engines with the shader compiler. - * With it, the shader can obtain required data from GPU state and store it for disk shader - * compilation. - */ -class Registry { -public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); - - explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine_); - - ~Registry(); - - /// Retrieves a key from the registry, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional<u32> ObtainKey(u32 buffer, u32 offset); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( - std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Checks keys and samplers against engine's current const buffers. - /// Returns true if they are the same value, false otherwise. - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the registry. - bool HasEqualKeys(const Registry& rhs) const; - - /// Returns graphics information from this shader - const GraphicsInfo& GetGraphicsInfo() const; - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - SeparateSamplerMap separate_samplers; - BindlessSamplerMap bindless_samplers; - u32 bound_buffer; - GraphicsInfo graphics_info; - ComputeInfo compute_info; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <cmath> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Attribute; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaMode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredCondition; -using Tegra::Shader::PredOperation; -using Tegra::Shader::Register; - -ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, - Registry& registry_) - : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ - registry_} { - Decode(); - PostDecode(); -} - -ShaderIR::~ShaderIR() = default; - -Node ShaderIR::GetRegister(Register reg) { - if (reg != Register::ZeroIndex) { - used_registers.insert(static_cast<u32>(reg)); - } - return MakeNode<GprNode>(reg); -} - -Node ShaderIR::GetCustomVariable(u32 id) { - return MakeNode<CustomVarNode>(id); -} - -Node ShaderIR::GetImmediate19(Instruction instr) { - return Immediate(instr.alu.GetImm20_19()); -} - -Node ShaderIR::GetImmediate32(Instruction instr) { - return Immediate(instr.alu.GetImm20_32()); -} - -Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { - const auto index = static_cast<u32>(index_); - const auto offset = static_cast<u32>(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); - - return MakeNode<CbufNode>(index, Immediate(offset)); -} - -Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { - const auto index = static_cast<u32>(index_); - const auto offset = static_cast<u32>(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); - - Node final_offset = [&] { - // Attempt to inline constant buffer without a variable offset. This is done to allow - // tracking LDC calls. - if (const auto gpr = std::get_if<GprNode>(&*node)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - return Immediate(offset); - } - } - return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); - }(); - return MakeNode<CbufNode>(index, std::move(final_offset)); -} - -Node ShaderIR::GetPredicate(u64 pred_, bool negated) { - const auto pred = static_cast<Pred>(pred_); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - used_predicates.insert(pred); - } - - return MakeNode<PredicateNode>(pred, negated); -} - -Node ShaderIR::GetPredicate(bool immediate) { - return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); -} - -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_input_attributes.emplace(index); - return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); -} - -Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { - uses_physical_attributes = true; - return MakeNode<AbufNode>(GetRegister(physical_address), buffer); -} - -Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_output_attributes.insert(index); - return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); -} - -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - Node node = MakeNode<InternalFlagNode>(flag); - if (negated) { - return Operation(OperationCode::LogicalNegate, std::move(node)); - } - return node; -} - -Node ShaderIR::GetLocalMemory(Node address) { - return MakeNode<LmemNode>(std::move(address)); -} - -Node ShaderIR::GetSharedMemory(Node address) { - return MakeNode<SmemNode>(std::move(address)); -} - -Node ShaderIR::GetTemporary(u32 id) { - return GetRegister(Register::ZeroIndex + 1 + id); -} - -Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { - switch (size) { - case Register::Size::Byte: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - return value; - case Register::Size::Short: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - return value; - case Register::Size::Word: - // Default - do nothing - return value; - default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", size); - return value; - } -} - -Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { - if (!is_signed) { - // Absolute or negate on an unsigned is pointless - return value; - } - if (absolute) { - value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { - Node value = Immediate(instr.half_imm.PackImmediates()); - if (!has_negation) { - return value; - } - - Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); - Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - - return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), - std::move(second_negate)); -} - -Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { - return Operation(OperationCode::HUnpack, type, std::move(value)); -} - -Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { - switch (merge) { - case Tegra::Shader::HalfMerge::H0_H1: - return src; - case Tegra::Shader::HalfMerge::F32: - return Operation(OperationCode::HMergeF32, std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H0: - return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H1: - return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); - } - UNREACHABLE(); - return src; -} - -Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), - GetPredicate(true)); - } - return value; -} - -Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - if (condition == PredCondition::T) { - return GetPredicate(true); - } else if (condition == PredCondition::F) { - return GetPredicate(false); - } - - static constexpr std::array comparison_table{ - OperationCode(0), - OperationCode::LogicalFOrdLessThan, // LT - OperationCode::LogicalFOrdEqual, // EQ - OperationCode::LogicalFOrdLessEqual, // LE - OperationCode::LogicalFOrdGreaterThan, // GT - OperationCode::LogicalFOrdNotEqual, // NE - OperationCode::LogicalFOrdGreaterEqual, // GE - OperationCode::LogicalFOrdered, // NUM - OperationCode::LogicalFUnordered, // NAN - OperationCode::LogicalFUnordLessThan, // LTU - OperationCode::LogicalFUnordEqual, // EQU - OperationCode::LogicalFUnordLessEqual, // LEU - OperationCode::LogicalFUnordGreaterThan, // GTU - OperationCode::LogicalFUnordNotEqual, // NEU - OperationCode::LogicalFUnordGreaterEqual, // GEU - }; - const std::size_t index = static_cast<std::size_t>(condition); - ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - - return Operation(comparison_table[index], op_a, op_b); -} - -Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, - std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); -} - -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); -} - -OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static constexpr std::array operation_table{ - OperationCode::LogicalAnd, - OperationCode::LogicalOr, - OperationCode::LogicalXor, - }; - - const auto index = static_cast<std::size_t>(operation); - if (index >= operation_table.size()) { - UNIMPLEMENTED_MSG("Unknown predicate operation."); - return {}; - } - - return operation_table[index]; -} - -Node ShaderIR::GetConditionCode(ConditionCode cc) const { - switch (cc) { - case ConditionCode::NEU: - return GetInternalFlag(InternalFlag::Zero, true); - case ConditionCode::FCSM_TR: - UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); - return MakeNode<PredicateNode>(Pred::NeverExecute, false); - default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); - return MakeNode<PredicateNode>(Pred::NeverExecute, false); - } -} - -void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { - bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); -} - -void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); -} - -void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); -} - -void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { - SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); -} - -void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), - Immediate(offset), Immediate(bits)); -} - -Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), - Immediate(bits)); -} - -void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { - switch (index) { - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - break; - case 1: - uses_layer = true; - break; - case 2: - uses_viewport_index = true; - break; - case 3: - uses_point_size = true; - break; - } - break; - case Attribute::Index::TessCoordInstanceIDVertexID: - switch (element) { - case 2: - uses_instance_id = true; - break; - case 3: - uses_vertex_id = true; - break; - } - break; - case Attribute::Index::ClipDistances0123: - case Attribute::Index::ClipDistances4567: { - const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; - used_clip_distances.at(clip_index) = true; - break; - } - case Attribute::Index::FrontColor: - case Attribute::Index::FrontSecondaryColor: - case Attribute::Index::BackColor: - case Attribute::Index::BackSecondaryColor: - uses_legacy_varyings = true; - break; - default: - if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { - uses_legacy_varyings = true; - } - break; - } -} - -std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const auto id = amend_code.size(); - amend_code.push_back(std::move(new_amend)); - return id; -} - -u32 ShaderIR::NewCustomVariable() { - return num_custom_variables++; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <list> -#include <map> -#include <optional> -#include <set> -#include <tuple> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct ShaderBlock; - -constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; - -struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) - : max_offset{max_offset_}, is_indirect{is_indirect_} {} - - constexpr ConstBuffer() = default; - - void MarkAsUsed(u64 offset) { - max_offset = std::max(max_offset, static_cast<u32>(offset)); - } - - void MarkAsUsedIndirect() { - is_indirect = true; - } - - bool IsIndirect() const { - return is_indirect; - } - - u32 GetSize() const { - return max_offset + static_cast<u32>(sizeof(float)); - } - - u32 GetMaxOffset() const { - return max_offset; - } - -private: - u32 max_offset = 0; - bool is_indirect = false; -}; - -struct GlobalMemoryUsage { - bool is_read{}; - bool is_written{}; -}; - -class ShaderIR final { -public: - explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, - CompilerSettings settings_, Registry& registry_); - ~ShaderIR(); - - const std::map<u32, NodeBlock>& GetBasicBlocks() const { - return basic_blocks; - } - - const std::set<u32>& GetRegisters() const { - return used_registers; - } - - const std::set<Tegra::Shader::Pred>& GetPredicates() const { - return used_predicates; - } - - const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { - return used_input_attributes; - } - - const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { - return used_output_attributes; - } - - const std::map<u32, ConstBuffer>& GetConstantBuffers() const { - return used_cbufs; - } - - const std::list<SamplerEntry>& GetSamplers() const { - return used_samplers; - } - - const std::list<ImageEntry>& GetImages() const { - return used_images; - } - - const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() - const { - return used_clip_distances; - } - - const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { - return used_global_memory; - } - - std::size_t GetLength() const { - return static_cast<std::size_t>(coverage_end * sizeof(u64)); - } - - bool UsesLayer() const { - return uses_layer; - } - - bool UsesViewportIndex() const { - return uses_viewport_index; - } - - bool UsesPointSize() const { - return uses_point_size; - } - - bool UsesInstanceId() const { - return uses_instance_id; - } - - bool UsesVertexId() const { - return uses_vertex_id; - } - - bool UsesLegacyVaryings() const { - return uses_legacy_varyings; - } - - bool UsesYNegate() const { - return uses_y_negate; - } - - bool UsesWarps() const { - return uses_warps; - } - - bool HasPhysicalAttributes() const { - return uses_physical_attributes; - } - - const Tegra::Shader::Header& GetHeader() const { - return header; - } - - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - u32 ConvertAddressToNvidiaSpace(u32 address) const { - return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); - } - - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; - - const Node& GetAmendNode(std::size_t index) const { - return amend_code[index]; - } - - u32 GetNumCustomVariables() const { - return num_custom_variables; - } - -private: - friend class ASTDecoder; - - struct SamplerInfo { - std::optional<Tegra::Shader::TextureType> type; - std::optional<bool> is_array; - std::optional<bool> is_shadow; - std::optional<bool> is_buffer; - - constexpr bool IsComplete() const noexcept { - return type && is_array && is_shadow && is_buffer; - } - }; - - void Decode(); - void PostDecode(); - - NodeBlock DecodeRange(u32 begin, u32 end); - void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); - void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); - - /** - * Decodes a single instruction from Tegra to IR. - * @param bb Basic block where the nodes will be written to. - * @param pc Program counter. Offset to decode. - * @return Next address to decode. - */ - u32 DecodeInstr(NodeBlock& bb, u32 pc); - - u32 DecodeArithmetic(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); - u32 DecodeBfe(NodeBlock& bb, u32 pc); - u32 DecodeBfi(NodeBlock& bb, u32 pc); - u32 DecodeShift(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); - u32 DecodeFfma(NodeBlock& bb, u32 pc); - u32 DecodeHfma2(NodeBlock& bb, u32 pc); - u32 DecodeConversion(NodeBlock& bb, u32 pc); - u32 DecodeWarp(NodeBlock& bb, u32 pc); - u32 DecodeMemory(NodeBlock& bb, u32 pc); - u32 DecodeTexture(NodeBlock& bb, u32 pc); - u32 DecodeImage(NodeBlock& bb, u32 pc); - u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeFloatSet(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); - u32 DecodeHalfSet(NodeBlock& bb, u32 pc); - u32 DecodeVideo(NodeBlock& bb, u32 pc); - u32 DecodeXmad(NodeBlock& bb, u32 pc); - u32 DecodeOther(NodeBlock& bb, u32 pc); - - /// Generates a node for a passed register. - Node GetRegister(Tegra::Shader::Register reg); - /// Generates a node for a custom variable - Node GetCustomVariable(u32 id); - /// Generates a node representing a 19-bit immediate value - Node GetImmediate19(Tegra::Shader::Instruction instr); - /// Generates a node representing a 32-bit immediate value - Node GetImmediate32(Tegra::Shader::Instruction instr); - /// Generates a node representing a constant buffer - Node GetConstBuffer(u64 index, u64 offset); - /// Generates a node representing a constant buffer with a variadic offset - Node GetConstBufferIndirect(u64 index, u64 offset, Node node); - /// Generates a node for a passed predicate. It can be optionally negated - Node GetPredicate(u64 pred, bool negated = false); - /// Generates a predicate node for an immediate true or false value - Node GetPredicate(bool immediate); - /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); - /// Generates a node representing a physical input attribute. - Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); - /// Generates a node representing an output attribute. Keeps track of used attributes. - Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); - /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false) const; - /// Generates a node representing a local memory address - Node GetLocalMemory(Node address); - /// Generates a node representing a shared memory address - Node GetSharedMemory(Node address); - /// Generates a temporary, internally it uses a post-RZ register - Node GetTemporary(u32 id); - - /// Sets a register. src value must be a number-evaluated node. - void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); - /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(NodeBlock& bb, u64 dest, Node src); - /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address with a value. - void SetLocalMemory(NodeBlock& bb, Node address, Node value); - /// Sets a shared memory address with a value. - void SetSharedMemory(NodeBlock& bb, Node address, Node value); - /// Sets a temporary. Internally it uses a post-RZ register - void SetTemporary(NodeBlock& bb, u32 id, Node value); - - /// Sets internal flags from a float - void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); - /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); - - /// Conditionally absolute/negated float. Absolute is applied first - Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); - /// Conditionally saturates a float - Node GetSaturatedFloat(Node value, bool saturate = true); - - /// Converts an integer to different sizes. - Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); - /// Conditionally absolute/negated integer. Absolute is applied first - Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); - - /// Unpacks a half immediate from an instruction - Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); - /// Unpacks a binary value into a half float pair with a type format - Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); - /// Merges a half pair into another value - Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); - /// Conditionally absolute/negated half float pair. Absolute is applied first - Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); - /// Conditionally saturates a half float pair - Node GetSaturatedHalfFloat(Node value, bool saturate = true); - - /// Get image component value by type and size - std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, - u32 component_size, Node original_value); - - /// Returns a predicate comparing two floats - Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - /// Returns a predicate comparing two integers - Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, - Node op_a, Node op_b); - /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - - /// Returns a predicate combiner operation - OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - - /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, - std::optional<Tegra::Engines::SamplerDescriptor> sampler); - - /// Accesses a texture sampler. - std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); - - /// Accesses a texture sampler for a bindless texture. - std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); - - /// Accesses an image. - ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); - - /// Access a bindless image sampler. - ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - - /// Extracts a sequence of bits from a node - Node BitfieldExtract(Node value, u32 offset, u32 bits); - - /// Inserts a sequence of bits from a node - Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); - - /// Marks the usage of a input or output attribute. - void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); - - /// Decodes VMNMX instruction and inserts its code into the passed basic block. - void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); - - void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); - - void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi, - std::optional<Tegra::Shader::Register> bindless_reg); - - Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); - - Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, - bool is_bindless); - - Node4 GetTldCode(Tegra::Shader::Instruction instr); - - Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool is_array); - - std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( - Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, - bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); - - std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, - std::optional<Tegra::Shader::Register> bindless_reg); - - Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, - u64 byte_height); - - void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, - Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, - Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut, bool sets_cc); - - std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); - - std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, - const OperationNode& operation, - Node gpr, Node base_offset, - Node tracked, const NodeBlock& code, - s64 cursor); - - std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const; - - std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, - Tegra::Shader::Instruction instr, - bool is_read, bool is_write); - - /// Register new amending code and obtain the reference id. - std::size_t DeclareAmend(Node new_amend); - - u32 NewCustomVariable(); - - const ProgramCode& program_code; - const u32 main_offset; - const CompilerSettings settings; - Registry& registry; - - bool decompiled{}; - bool disable_flow_stack{}; - - u32 coverage_begin{}; - u32 coverage_end{}; - - std::map<u32, NodeBlock> basic_blocks; - NodeBlock global_code; - ASTManager program_manager{true, true}; - std::vector<Node> amend_code; - u32 num_custom_variables{}; - - std::set<u32> used_registers; - std::set<Tegra::Shader::Pred> used_predicates; - std::set<Tegra::Shader::Attribute::Index> used_input_attributes; - std::set<Tegra::Shader::Attribute::Index> used_output_attributes; - std::map<u32, ConstBuffer> used_cbufs; - std::list<SamplerEntry> used_samplers; - std::list<ImageEntry> used_images; - std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; - bool uses_layer{}; - bool uses_viewport_index{}; - bool uses_point_size{}; - bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes - bool uses_instance_id{}; - bool uses_vertex_id{}; - bool uses_legacy_varyings{}; - bool uses_y_negate{}; - bool uses_warps{}; - bool uses_indexed_samplers{}; - - Tegra::Shader::Header header; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <utility> -#include <variant> - -#include "common/common_types.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, - OperationCode operation_code) { - for (; cursor >= 0; --cursor) { - Node node = code.at(cursor); - - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (operation->GetCode() == operation_code) { - return {std::move(node), cursor}; - } - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - const auto& conditional_code = conditional->GetCode(); - auto result = FindOperation( - conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); - auto& found = result.first; - if (found) { - return {std::move(found), cursor}; - } - } - } - return {}; -} - -std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { - if (operation.GetCode() != OperationCode::UAdd) { - return std::nullopt; - } - Node gpr; - Node offset; - ASSERT(operation.GetOperandsCount() == 2); - for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { - Node operand = operation[i]; - if (std::holds_alternative<ImmediateNode>(*operand)) { - offset = operation[i]; - } else if (std::holds_alternative<GprNode>(*operand)) { - gpr = operation[i]; - } - } - if (offset && gpr) { - return std::make_pair(gpr, offset); - } - return std::nullopt; -} - -bool AmendNodeCv(std::size_t amend_index, Node node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - operation->SetAmendIndex(amend_index); - return true; - } - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - conditional->SetAmendIndex(amend_index); - return true; - } - return false; -} - -} // Anonymous namespace - -std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { - if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { - const u32 cbuf_index = cbuf->GetIndex(); - - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); - return {tracked, track}; - } - if (const auto operation = std::get_if<OperationNode>(&*offset)) { - const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf_index) { - return {}; - } - if (const std::optional pair = DecoupleIndirectRead(*operation)) { - auto [gpr, base_offset] = *pair; - return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, - code, cursor); - } - } - return {}; - } - if (const auto gpr = std::get_if<GprNode>(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackBindlessSampler(source, code, new_cursor); - } - if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - const OperationNode& op = *operation; - - const OperationCode opcode = operation->GetCode(); - if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { - ASSERT(op.GetOperandsCount() == 2); - auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); - auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); - if (node_a && node_b) { - auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, - std::pair{offset_a, offset_b}); - return {tracked, std::move(track)}; - } - } - std::size_t i = op.GetOperandsCount(); - while (i--) { - if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { - // Constant buffer found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackBindlessSampler(tracked, conditional_code, - static_cast<s64>(conditional_code.size())); - } - return {}; -} - -std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( - const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, - const NodeBlock& code, s64 cursor) { - const auto offset_imm = std::get<ImmediateNode>(*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); - Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); - - Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - - // TODO: Implement bindless index custom variable - auto track = - MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); - return {tracked, track}; -} - -std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, - s64 cursor) const { - if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - return {tracked, cbuf->GetIndex(), immediate->GetValue()}; - } - return {}; - } - if (const auto gpr = std::get_if<GprNode>(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackCbuf(source, code, new_cursor); - } - if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { - // Cbuf found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); - } - return {}; -} - -std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register - // that it uses as operand - const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); - const auto& found = result.first; - if (!found) { - return std::nullopt; - } - if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { - return immediate->GetValue(); - } - return std::nullopt; -} - -std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const { - for (; cursor >= 0; --cursor) { - const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); - if (!found_node) { - return {}; - } - const auto operation = std::get_if<OperationNode>(&*found_node); - ASSERT(operation); - - const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if<GprNode>(&*target)) { - if (gpr_target->GetIndex() == tracked->GetIndex()) { - return {(*operation)[1], new_cursor}; - } - } - } - return {}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <unordered_map> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/transform_feedback.h" - -namespace VideoCommon::Shader { - -namespace { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 - -/// Attribute offsets that describe a vector -constexpr std::array VECTORS = { - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] -}; -} // namespace - -std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { - - std::unordered_map<u8, VaryingTFB> tfb; - - for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = info.tfb_varying_locs[buffer]; - const auto& layout = info.tfb_layouts[buffer]; - const std::size_t varying_count = layout.varying_count; - - std::size_t highest = 0; - - for (std::size_t offset = 0; offset < varying_count; ++offset) { - const std::size_t base_offset = offset; - const u8 location = locations[offset]; - - VaryingTFB varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * sizeof(u32); - varying.components = 1; - - if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - - [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; - UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); - - highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); - } - - UNIMPLEMENTED_IF(highest != layout.stride); - } - return tfb; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <unordered_map> - -#include "common/common_types.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct VaryingTFB { - std::size_t buffer; - std::size_t stride; - std::size_t offset; - std::size_t components; -}; - -std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..78bf90c48 --- /dev/null +++ b/src/video_core/shader_cache.cpp @@ -0,0 +1,250 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <vector> + +#include "common/assert.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" + +namespace VideoCommon { + +void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); +} + +void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); +} + +void ShaderCache::SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); +} + +ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_) + : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + rasterizer{rasterizer_} {} + +bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_shaders_valid; + } + dirty[VideoCommon::Dirty::Shaders] = false; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + unique_hashes[index] = 0; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_shaders_valid = false; + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); + } + shader_infos[index] = shader_info; + unique_hashes[index] = shader_info->unique_hash; + } + last_shaders_valid = true; + return true; +} + +const ShaderInfo* ShaderCache::ComputeShader() { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return nullptr; + } + if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { + return shader; + } + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + return MakeShaderInfo(env, *cpu_shader_addr); +} + +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array<u64, NUM_PROGRAMS>& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + +ShaderInfo* ShaderCache::TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; +} + +void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); +} + +void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } +} + +void ShaderCache::RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::ranges::sort(marked_for_removal); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector<ShaderInfo*> removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } +} + +void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { + size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } +} + +void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector<Entry*>& entries = entries_it->second; + + const auto entry_it = std::ranges::find(entries, entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } +} + +void ShaderCache::UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); +} + +void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) { + // Remove them from the cache + std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) { + return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); + }); +} + +ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { + auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; +} + +const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { + auto info = std::make_unique<ShaderInfo>(); + if (const std::optional<u64> cached_hash{env.Analyze()}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; + Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -5,226 +5,147 @@ #pragma once #include <algorithm> +#include <array> #include <memory> #include <mutex> +#include <span> #include <unordered_map> #include <utility> #include <vector> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" + +namespace Tegra { +class MemoryManager; +} namespace VideoCommon { -template <class T> +class GenericEnvironment; + +struct ShaderInfo { + u64 unique_hash{}; + size_t size_bytes{}; +}; + class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; - T* data; + ShaderInfo* data; bool is_memory_marked = true; - constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + bool Overlaps(VAddr start, VAddr end) const noexcept { return start < addr_end && addr_start < end; } }; public: - virtual ~ShaderCache() = default; - /// @brief Removes shaders inside a given region /// @note Checks for ranges /// @param addr Start address of the invalidation /// @param size Number of bytes of the invalidation - void InvalidateRegion(VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - RemovePendingShaders(); - } + void InvalidateRegion(VAddr addr, size_t size); /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - } + void OnCPUWrite(VAddr addr, size_t size); /// @brief Flushes delayed removal operations - void SyncGuestHost() { - std::scoped_lock lock{invalidation_mutex}; - RemovePendingShaders(); - } + void SyncGuestHost(); - /// @brief Tries to obtain a cached shader starting in a given address - /// @note Doesn't check for ranges, the given address has to be the start of the shader - /// @param addr Start address of the shader, this doesn't cache for region - /// @return Pointer to a valid shader, nullptr when nothing is found - T* TryGet(VAddr addr) const { - std::scoped_lock lock{lookup_mutex}; +protected: + struct GraphicsEnvironments { + std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; + std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs; - const auto it = lookup_cache.find(addr); - if (it == lookup_cache.end()) { - return nullptr; + std::span<Shader::Environment* const> Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); } - return it->second->data; - } - -protected: - explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + }; - /// @brief Register in the cache a given entry - /// @param data Shader to store in the cache - /// @param addr Start address of the shader that will be registered - /// @param size Size in bytes of the shader - void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_); - const VAddr addr_end = addr + size; - Entry* const entry = NewEntry(addr, addr_end, data.get()); + /// @brief Update the hashes and information of shader stages + /// @param unique_hashes Shader hashes to store into when a stage is enabled + /// @return True no success, false on error + bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes); - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - invalidation_cache[page].push_back(entry); - } + /// @brief Returns information about the current compute shader + /// @return Pointer to a valid shader, nullptr on error + const ShaderInfo* ComputeShader(); - storage.push_back(std::move(data)); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array<u64, NUM_PROGRAMS>& unique_hashes); - rasterizer.UpdatePagesCachedCount(addr, size, 1); - } + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; - /// @brief Called when a shader is going to be removed - /// @param shader Shader that will be removed - /// @pre invalidation_cache is locked - /// @pre lookup_mutex is locked - virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} + std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; + bool last_shaders_valid = false; private: + /// @brief Tries to obtain a cached shader starting in a given address + /// @note Doesn't check for ranges, the given address has to be the start of the shader + /// @param addr Start address of the shader, this doesn't cache for region + /// @return Pointer to a valid shader, nullptr when nothing is found + ShaderInfo* TryGet(VAddr addr) const; + + /// @brief Register in the cache a given entry + /// @param data Shader to store in the cache + /// @param addr Start address of the shader that will be registered + /// @param size Size in bytes of the shader + void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size); + /// @brief Invalidate pages in a given region /// @pre invalidation_mutex is locked - void InvalidatePagesInRegion(VAddr addr, std::size_t size) { - const VAddr addr_end = addr + size; - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - auto it = invalidation_cache.find(page); - if (it == invalidation_cache.end()) { - continue; - } - InvalidatePageEntries(it->second, addr, addr_end); - } - } + void InvalidatePagesInRegion(VAddr addr, size_t size); /// @brief Remove shaders marked for deletion /// @pre invalidation_mutex is locked - void RemovePendingShaders() { - if (marked_for_removal.empty()) { - return; - } - // Remove duplicates - std::sort(marked_for_removal.begin(), marked_for_removal.end()); - marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), - marked_for_removal.end()); - - std::vector<T*> removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); - - std::scoped_lock lock{lookup_mutex}; - - for (Entry* const entry : marked_for_removal) { - removed_shaders.push_back(entry->data); - - const auto it = lookup_cache.find(entry->addr_start); - ASSERT(it != lookup_cache.end()); - lookup_cache.erase(it); - } - marked_for_removal.clear(); - - if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); - } - } + void RemovePendingShaders(); /// @brief Invalidates entries in a given range for the passed page /// @param entries Vector of entries in the page, it will be modified on overlaps /// @param addr Start address of the invalidation /// @param addr_end Non-inclusive end address of the invalidation /// @pre invalidation_mutex is locked - void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { - std::size_t index = 0; - while (index < entries.size()) { - Entry* const entry = entries[index]; - if (!entry->Overlaps(addr, addr_end)) { - ++index; - continue; - } - - UnmarkMemory(entry); - RemoveEntryFromInvalidationCache(entry); - marked_for_removal.push_back(entry); - } - } + void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end); /// @brief Removes all references to an entry in the invalidation cache /// @param entry Entry to remove from the invalidation cache /// @pre invalidation_mutex is locked - void RemoveEntryFromInvalidationCache(const Entry* entry) { - const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { - const auto entries_it = invalidation_cache.find(page); - ASSERT(entries_it != invalidation_cache.end()); - std::vector<Entry*>& entries = entries_it->second; - - const auto entry_it = std::find(entries.begin(), entries.end(), entry); - ASSERT(entry_it != entries.end()); - entries.erase(entry_it); - } - } + void RemoveEntryFromInvalidationCache(const Entry* entry); /// @brief Unmarks an entry from the rasterizer cache /// @param entry Entry to unmark from memory - void UnmarkMemory(Entry* entry) { - if (!entry->is_memory_marked) { - return; - } - entry->is_memory_marked = false; - - const VAddr addr = entry->addr_start; - const std::size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); - } + void UnmarkMemory(Entry* entry); /// @brief Removes a vector of shaders from a list /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { - // Notify removals - for (T* const shader : removed_shaders) { - OnShaderRemoval(shader); - } - - // Remove them from the cache - const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) { - return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != - removed_shaders.end(); - }; - std::erase_if(storage, is_removed); - } + void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked - Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { - auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); - Entry* const entry_pointer = entry.get(); + Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); - lookup_cache.emplace(addr, std::move(entry)); - return entry_pointer; - } + /// @brief Create a new shader entry and register it + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); VideoCore::RasterizerInterface& rasterizer; @@ -233,7 +154,7 @@ private: std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; - std::vector<std::unique_ptr<T>> storage; + std::vector<std::unique_ptr<ShaderInfo>> storage; std::vector<Entry*> marked_for_removal; }; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..8a4581c19 --- /dev/null +++ b/src/video_core/shader_environment.cpp @@ -0,0 +1,460 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <filesystem> +#include <fstream> +#include <memory> +#include <optional> +#include <utility> + +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/fs/fs.h" +#include "common/logging/log.h" +#include "shader_recompiler/environment.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_environment.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; + +constexpr size_t INST_SIZE = sizeof(u64); + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +static u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast<u64>(index) << 32) | offset; +} + +static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return Shader::TextureType::Buffer; + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } +} + +GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; +} + +GenericEnvironment::~GenericEnvironment() = default; + +u32 GenericEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +u32 GenericEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 GenericEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +std::array<u32, 3> GenericEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +u64 GenericEnvironment::ReadInstruction(u32 address) { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read<u64>(program_base + address); +} + +std::optional<u64> GenericEnvironment::Analyze() { + const std::optional<u64> size{TryFindSize()}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast<u32>(*size); + return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size); +} + +void GenericEnvironment::SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast<u32>(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); +} + +size_t GenericEnvironment::CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; +} + +size_t GenericEnvironment::ReadSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; +} + +bool GenericEnvironment::CanBeSerialized() const noexcept { + return !has_unbound_instructions; +} + +u64 GenericEnvironment::CalculateHash() const { + const size_t size{ReadSize()}; + const auto data{std::make_unique<char[]>(size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash64(data.get(), size); +} + +void GenericEnvironment::Serialize(std::ofstream& file) const { + const u64 code_size{static_cast<u64>(CachedSize())}; + const u64 num_texture_types{static_cast<u64>(texture_types.size())}; + const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; + + file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) + .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) + .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) + .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) + .write(reinterpret_cast<const char*>(code.data()), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast<const char*>(&key), sizeof(key)) + .write(reinterpret_cast<const char*>(&type), sizeof(type)); + } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast<const char*>(&key), sizeof(key)) + .write(reinterpret_cast<const char*>(&type), sizeof(type)); + } + if (stage == Shader::Stage::Compute) { + file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast<const char*>(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } + } +} + +std::optional<u64> GenericEnvironment::TryFindSize() { + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + index; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; +} + +Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + const Shader::TextureType result{ConvertType(entry)}; + texture_types.emplace(raw, result); + return result; +} + +GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Maxwell::ShaderProgram program, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + stage_index = 0; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + stage_index = 0; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + stage_index = 1; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + stage_index = 2; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + stage_index = 3; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + stage_index = 4; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + break; + } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits<u32>::max()); + local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; + texture_bound = maxwell3d->regs.tex_cb_index; +} + +u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); +} + +ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; + stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; +} + +u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); +} + +void FileEnvironment::Deserialize(std::ifstream& file) { + u64 code_size{}; + u64 num_texture_types{}; + u64 num_cbuf_values{}; + file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) + .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) + .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) + .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast<char*>(&stage), sizeof(stage)); + code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast<char*>(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u32 key; + Shader::TextureType type; + file.read(reinterpret_cast<char*>(&key), sizeof(key)) + .read(reinterpret_cast<char*>(&type), sizeof(type)); + texture_types.emplace(key, type); + } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast<char*>(&key), sizeof(key)) + .read(reinterpret_cast<char*>(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } + } +} + +u64 FileEnvironment::ReadInstruction(u32 address) { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; +} + +u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { + const auto it{texture_types.find(handle)}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +u32 FileEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 FileEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +u32 FileEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +std::array<u32, 3> FileEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, + const std::filesystem::path& filename, u32 cache_version) try { + std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + if (file.tellp() == 0) { + // Write header + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version)); + } + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast<u32>(envs.size())}; + file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, + Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, + Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array<char, 8> magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); + } + if (cache_version != expected_cache_version) { + LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return; + } + while (file.tellg() != end) { + if (stop_loading.stop_requested()) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); + std::vector<FileEnvironment> envs(num_envs); + for (FileEnvironment& env : envs) { + env.Deserialize(file); + } + if (envs.front().ShaderStage() == Shader::Stage::Compute) { + load_compute(file, std::move(envs.front())); + } else { + load_graphics(file, std::move(envs)); + } + } + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..2079979db --- /dev/null +++ b/src/video_core/shader_environment.h @@ -0,0 +1,183 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <atomic> +#include <filesystem> +#include <iosfwd> +#include <limits> +#include <memory> +#include <optional> +#include <span> +#include <type_traits> +#include <unordered_map> +#include <vector> + +#include "common/common_types.h" +#include "common/unique_function.h" +#include "shader_recompiler/environment.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +class Memorymanager; +} + +namespace VideoCommon { + +class GenericEnvironment : public Shader::Environment { +public: + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~GenericEnvironment() override; + + [[nodiscard]] u32 TextureBoundBuffer() const final; + + [[nodiscard]] u32 LocalMemorySize() const final; + + [[nodiscard]] u32 SharedMemorySize() const final; + + [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final; + + [[nodiscard]] u64 ReadInstruction(u32 address) final; + + [[nodiscard]] std::optional<u64> Analyze(); + + void SetCachedSize(size_t size_bytes); + + [[nodiscard]] size_t CachedSize() const noexcept; + + [[nodiscard]] size_t ReadSize() const noexcept; + + [[nodiscard]] bool CanBeSerialized() const noexcept; + + [[nodiscard]] u64 CalculateHash() const; + + void Serialize(std::ofstream& file) const; + +protected: + std::optional<u64> TryFindSize(); + + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + u32 raw); + + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector<u64> code; + std::unordered_map<u32, Shader::TextureType> texture_types; + std::unordered_map<u64, u32> cbuf_values; + + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array<u32, 3> workgroup_size{}; + + u32 read_lowest = std::numeric_limits<u32>::max(); + u32 read_highest = 0; + + u32 cached_lowest = std::numeric_limits<u32>::max(); + u32 cached_highest = 0; + + bool has_unbound_instructions = false; +}; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, + GPUVAddr program_base_, u32 start_address_); + + ~GraphicsEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~ComputeEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; + +class FileEnvironment final : public Shader::Environment { +public: + FileEnvironment() = default; + ~FileEnvironment() override = default; + + FileEnvironment& operator=(FileEnvironment&&) noexcept = default; + FileEnvironment(FileEnvironment&&) noexcept = default; + + FileEnvironment& operator=(const FileEnvironment&) = delete; + FileEnvironment(const FileEnvironment&) = delete; + + void Deserialize(std::ifstream& file); + + [[nodiscard]] u64 ReadInstruction(u32 address) override; + + [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + + [[nodiscard]] u32 LocalMemorySize() const override; + + [[nodiscard]] u32 SharedMemorySize() const override; + + [[nodiscard]] u32 TextureBoundBuffer() const override; + + [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; + +private: + std::unique_ptr<u64[]> code; + std::unordered_map<u32, Shader::TextureType> texture_types; + std::unordered_map<u64, u32> cbuf_values; + std::array<u32, 3> workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; + +void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, + const std::filesystem::path& filename, u32 cache_version); + +template <typename Key, typename Envs> +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename, + u32 cache_version) { + static_assert(std::is_trivially_copyable_v<Key>); + static_assert(std::has_unique_object_representations_v<Key>); + SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)), + std::span(envs.data(), envs.size()), filename, cache_version); +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, + Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, + Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics); + +} // namespace VideoCommon diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index 693e47158..dc6995b46 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -2,42 +2,35 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <mutex> +#include <atomic> +#include <chrono> +#include <optional> + #include "video_core/shader_notify.h" using namespace std::chrono_literals; namespace VideoCore { -namespace { -constexpr auto UPDATE_TICK = 32ms; -} - -ShaderNotify::ShaderNotify() = default; -ShaderNotify::~ShaderNotify() = default; -std::size_t ShaderNotify::GetShadersBuilding() { - const auto now = std::chrono::high_resolution_clock::now(); - const auto diff = now - last_update; - if (diff > UPDATE_TICK) { - std::shared_lock lock(mutex); - last_updated_count = accurate_count; +const auto TIME_TO_STOP_REPORTING = 2s; + +int ShaderNotify::ShadersBuilding() noexcept { + const int now_complete = num_complete.load(std::memory_order::relaxed); + const int now_building = num_building.load(std::memory_order::relaxed); + if (now_complete == now_building) { + const auto now = std::chrono::high_resolution_clock::now(); + if (completed && num_complete == num_when_completed) { + if (now - complete_time > TIME_TO_STOP_REPORTING) { + report_base = now_complete; + completed = false; + } + } else { + completed = true; + num_when_completed = num_complete; + complete_time = now; + } } - return last_updated_count; -} - -std::size_t ShaderNotify::GetShadersBuildingAccurate() { - std::shared_lock lock{mutex}; - return accurate_count; -} - -void ShaderNotify::MarkShaderComplete() { - std::unique_lock lock{mutex}; - accurate_count--; -} - -void ShaderNotify::MarkSharderBuilding() { - std::unique_lock lock{mutex}; - accurate_count++; + return now_building - report_base; } } // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index a9c92d179..ad363bfb5 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -4,26 +4,30 @@ #pragma once +#include <atomic> #include <chrono> -#include <shared_mutex> -#include "common/common_types.h" +#include <optional> namespace VideoCore { class ShaderNotify { public: - ShaderNotify(); - ~ShaderNotify(); + [[nodiscard]] int ShadersBuilding() noexcept; - std::size_t GetShadersBuilding(); - std::size_t GetShadersBuildingAccurate(); + void MarkShaderComplete() noexcept { + ++num_complete; + } - void MarkShaderComplete(); - void MarkSharderBuilding(); + void MarkShaderBuilding() noexcept { + ++num_building; + } private: - std::size_t last_updated_count{}; - std::size_t accurate_count{}; - std::shared_mutex mutex; - std::chrono::high_resolution_clock::time_point last_update{}; + std::atomic_int num_building{}; + std::atomic_int num_complete{}; + int report_base{}; + + bool completed{}; + int num_when_completed{}; + std::chrono::high_resolution_clock::time_point complete_time; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional<ImageViewType> type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e8d632f9e..450becbeb 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } } +ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) + : format{info.format}, type{ImageViewType::Buffer}, size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { + ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); +} + ImageViewBase::ImageViewBase(const NullImageParams&) {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 73954167e..903f715c5 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); + explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); explicit ImageViewBase(const NullImageParams&); [[nodiscard]] bool IsBuffer() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85ce06d56..f34c9d9ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -117,6 +117,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + /// Fill image_view_ids with the graphics images in indices void FillGraphicsImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); @@ -527,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { } template <class P> +void TextureCache<P>::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + +template <class P> void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids) { FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); @@ -540,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, template <class P> typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { - [[unlikely]] if (index > graphics_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = graphics_sampler_table.Read(index); SamplerId& id = graphics_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -554,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { template <class P> typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { - [[unlikely]] if (index > compute_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = compute_sampler_table.Read(index); SamplerId& id = compute_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -661,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) { if (index > table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } const auto [descriptor, is_new] = table.Read(index); @@ -968,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); - } else if (image.info.type == ImageType::Buffer) { - const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); @@ -993,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { template <class P> ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { const ImageInfo info(config); - const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); if (!image_id) { return NULL_IMAGE_VIEW_ID; @@ -1801,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi return; } const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } PrepareImage(image_view.image_id, is_modification, invalidate); } diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp new file mode 100644 index 000000000..ba26ac3f1 --- /dev/null +++ b/src/video_core/transform_feedback.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <vector> + +#include "common/alignment.h" +#include "common/assert.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/transform_feedback.h" + +namespace VideoCommon { + +std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( + const TransformFeedbackState& state) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector<Shader::TransformFeedbackVarying> xfb(256); + for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { + const auto& locations = state.varyings[buffer]; + const auto& layout = state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream); + Shader::TransformFeedbackVarying varying{ + .buffer = static_cast<u32>(buffer), + .stride = layout.stride, + .offset = offset * 4, + .components = 1, + }; + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +} // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h new file mode 100644 index 000000000..8f6946d65 --- /dev/null +++ b/src/video_core/transform_feedback.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> + +#include "common/common_types.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCommon { + +struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts; + std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + varyings; +}; + +std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( + const TransformFeedbackState& state); + +} // namespace VideoCommon diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 758c038ba..fdd1a5081 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() { } } -void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { +void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const { if (!initialized) { return; } - - std::vector<u32> spirv_copy = spirv; + std::vector<u32> spirv_copy(spirv.begin(), spirv.end()); GFSDK_Aftermath_SpirvCode shader; shader.pData = spirv_copy.data(); shader.size = static_cast<u32>(spirv_copy.size() * 4); @@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; } - if (file.Write(spirv) != spirv.size()) { + if (file.WriteSpan(spirv) != spirv.size()) { LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); return; } diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index 4fe2b14d9..eae1891dd 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -6,6 +6,7 @@ #include <filesystem> #include <mutex> +#include <span> #include <string> #include <vector> @@ -33,7 +34,7 @@ public: NsightAftermathTracker(NsightAftermathTracker&&) = delete; NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - void SaveShader(const std::vector<u32>& spirv) const; + void SaveShader(std::span<const u32> spirv) const; private: #ifdef HAS_NSIGHT_AFTERMATH @@ -61,21 +62,21 @@ private: bool initialized = false; Common::DynamicLibrary dl; - PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; - PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; - PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; - PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; - PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; - PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{}; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{}; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{}; #endif }; #ifndef HAS_NSIGHT_AFTERMATH inline NsightAftermathTracker::NsightAftermathTracker() = default; inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} +inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {} #endif } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..44afdc1cd 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <bitset> #include <chrono> #include <optional> @@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ }; } // namespace Alternatives +enum class NvidiaArchitecture { + AmpereOrNewer, + Turing, + VoltaOrOlder, +}; + constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_MAINTENANCE1_EXTENSION_NAME, VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, @@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, @@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica return format_properties; } +std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) { + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); + std::vector<std::string> supported_extensions; + supported_extensions.reserve(extensions.size()); + for (const auto& extension : extensions) { + supported_extensions.emplace_back(extension.extensionName); + } + return supported_extensions; +} + +NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, + std::span<const std::string> exts) { + if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; + shading_rate_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + VkPhysicalDeviceProperties2KHR physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + physical_properties.pNext = &shading_rate_props; + physical.GetProperties2KHR(physical_properties); + if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { + // Only Ampere and newer support this feature + return NvidiaArchitecture::AmpereOrNewer; + } + } + if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { + return NvidiaArchitecture::Turing; + } + return NvidiaArchitecture::VoltaOrOlder; +} } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical)} { + supported_extensions{GetSupportedExtensions(physical)}, + format_properties(GetFormatProperties(physical)) { CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .independentBlend = true, .geometryShader = true, .tessellationShader = true, - .sampleRateShading = false, - .dualSrcBlend = false, + .sampleRateShading = true, + .dualSrcBlend = true, .logicOp = false, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, .depthClamp = true, .depthBiasClamp = true, - .fillModeNonSolid = false, - .depthBounds = false, - .wideLines = false, + .fillModeNonSolid = true, + .depthBounds = is_depth_bounds_supported, + .wideLines = true, .largePoints = true, .alphaToOne = false, .multiViewport = true, @@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, + .shaderClipDistance = true, + .shaderCullDistance = true, + .shaderFloat64 = is_shader_float64_supported, + .shaderInt64 = is_shader_int64_supported, + .shaderInt16 = is_shader_int16_supported, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, @@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, + .pNext = nullptr, + .variablePointersStorageBuffer = VK_TRUE, + .variablePointers = VK_TRUE, + }; + SetNext(next, variable_pointers); + + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + .pNext = nullptr, + .shaderDemoteToHelperInvocation = true, + }; + SetNext(next, demote); + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8 = { @@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); } + if (!nv_viewport_array2) { + LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); + } + + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + if (ext_line_rasterization) { + line_raster = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, + .pNext = nullptr, + .rectangularLines = VK_TRUE, + .bresenhamLines = VK_FALSE, + .smoothLines = VK_TRUE, + .stippledRectangularLines = VK_FALSE, + .stippledBresenhamLines = VK_FALSE, + .stippledSmoothLines = VK_FALSE, + }; + SetNext(next, line_raster); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); + } + + if (!ext_conservative_rasterization) { + LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); + } + + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + if (ext_provoking_vertex) { + provoking_vertex = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .pNext = nullptr, + .provokingVertexLast = VK_TRUE, + .transformFeedbackPreservesProvokingVertex = VK_TRUE, + }; + SetNext(next, provoking_vertex); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); + } + + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; + if (ext_vertex_input_dynamic_state) { + vertex_input_dynamic = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .vertexInputDynamicState = VK_TRUE, + }; + SetNext(next, vertex_input_dynamic); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); + } + + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; + if (ext_shader_atomic_int64) { + atomic_int64 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR, + .pNext = nullptr, + .shaderBufferInt64Atomics = VK_TRUE, + .shaderSharedInt64Atomics = VK_TRUE, + }; + SetNext(next, atomic_int64); + } + + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; + if (khr_workgroup_memory_explicit_layout) { + workgroup_layout = { + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .workgroupMemoryExplicitLayout = VK_TRUE, + .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, + .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, + .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, + }; + SetNext(next, workgroup_layout); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { + if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); diagnostics_nv = { @@ -412,11 +549,33 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + switch (arch) { + case NvidiaArchitecture::AmpereOrNewer: + LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); + is_float16_supported = false; + break; + case NvidiaArchitecture::Turing: + break; + case NvidiaArchitecture::VoltaOrOlder: + LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); + khr_push_descriptor = false; + break; + } + } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { - LOG_WARNING( - Render_Vulkan, - "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); - ext_extended_dynamic_state = false; + // Mask driver version variant + const u32 version = (properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { + LOG_WARNING(Render_Vulkan, + "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } + } + if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); + ext_vertex_input_dynamic_state = false; } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. @@ -426,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -471,7 +628,7 @@ void Device::ReportLoss() const { std::this_thread::sleep_for(std::chrono::seconds{15}); } -void Device::SaveShader(const std::vector<u32>& spirv) const { +void Device::SaveShader(std::span<const u32> spirv) const { if (nsight_aftermath_tracker) { nsight_aftermath_tracker->SaveShader(spirv); } @@ -597,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; + demote.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + demote.pNext = nullptr; + + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; + variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR; + variable_pointers.pNext = &demote; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = &variable_pointers; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -610,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const { const std::array feature_report{ std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), @@ -618,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.largePoints, "largePoints"), std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), + std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), + std::make_pair(features.wideLines, "wideLines"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.sampleRateShading, "sampleRateShading"), + std::make_pair(features.dualSrcBlend, "dualSrcBlend"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(features.shaderClipDistance, "shaderClipDistance"), + std::make_pair(features.shaderCullDistance, "shaderCullDistance"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), + std::make_pair(variable_pointers.variablePointers, "variablePointers"), + std::make_pair(variable_pointers.variablePointersStorageBuffer, + "variablePointersStorageBuffer"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), @@ -647,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { } bool has_khr_shader_float16_int8{}; + bool has_khr_workgroup_memory_explicit_layout{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; - for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + bool has_ext_shader_atomic_int64{}; + bool has_ext_provoking_vertex{}; + bool has_ext_vertex_input_dynamic_state{}; + bool has_ext_line_rasterization{}; + for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { - if (extension.extensionName != std::string_view(name)) { + if (extension != name) { return; } if (push) { @@ -665,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { } }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); + test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -675,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { true); test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, + true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { + test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); + test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, + false); + test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); + test(has_khr_workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); + if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -722,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + ext_subgroup_size_control = true; } } else { is_warp_potentially_bigger = true; } + if (has_ext_provoking_vertex) { + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; + provoking_vertex.pNext = nullptr; + features.pNext = &provoking_vertex; + physical.GetFeatures2KHR(features); + + if (provoking_vertex.provokingVertexLast && + provoking_vertex.transformFeedbackPreservesProvokingVertex) { + extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + ext_provoking_vertex = true; + } + } + if (has_ext_vertex_input_dynamic_state) { + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; + vertex_input.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; + vertex_input.pNext = nullptr; + features.pNext = &vertex_input; + physical.GetFeatures2KHR(features); + + if (vertex_input.vertexInputDynamicState) { + extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + ext_vertex_input_dynamic_state = true; + } + } + if (has_ext_shader_atomic_int64) { + VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; + atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + atomic_int64.pNext = nullptr; + features.pNext = &atomic_int64; + physical.GetFeatures2KHR(features); + + if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { + extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + ext_shader_atomic_int64 = true; + } + } if (has_ext_transform_feedback) { VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; @@ -760,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { } } if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - features.pNext = &dynamic_state; + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; + extended_dynamic_state.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + extended_dynamic_state.pNext = nullptr; + features.pNext = &extended_dynamic_state; physical.GetFeatures2KHR(features); - if (dynamic_state.extendedDynamicState) { + if (extended_dynamic_state.extendedDynamicState) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); ext_extended_dynamic_state = true; } } + if (has_ext_line_rasterization) { + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; + line_raster.pNext = nullptr; + features.pNext = &line_raster; + physical.GetFeatures2KHR(features); + if (line_raster.rectangularLines && line_raster.smoothLines) { + extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); + ext_line_rasterization = true; + } + } + if (has_khr_workgroup_memory_explicit_layout) { + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; + layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; + layout.pNext = nullptr; + features.pNext = &layout; + physical.GetFeatures2KHR(features); + + if (layout.workgroupMemoryExplicitLayout && + layout.workgroupMemoryExplicitLayout8BitAccess && + layout.workgroupMemoryExplicitLayout16BitAccess && + layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { + extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + khr_workgroup_memory_explicit_layout = true; + } + } + if (khr_push_descriptor) { + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; + push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + push_descriptor.pNext = nullptr; + + physical_properties.pNext = &push_descriptor; + physical.GetProperties2KHR(physical_properties); + + max_push_descriptors = push_descriptor.maxPushDescriptors; + } return extensions; } @@ -806,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_depth_bounds_supported = features.depthBounds; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_float64_supported = features.shaderFloat64; + is_shader_int64_supported = features.shaderInt64; + is_shader_int16_supported = features.shaderInt16; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { @@ -832,12 +1118,6 @@ void Device::CollectTelemetryParameters() { driver_id = driver.driverID; vendor_name = driver.driverName; - - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - reported_extensions.reserve(std::size(extensions)); - for (const auto& extension : extensions) { - reported_extensions.emplace_back(extension.extensionName); - } } void Device::CollectPhysicalMemoryInfo() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..df394e384 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -4,6 +4,7 @@ #pragma once +#include <span> #include <string> #include <string_view> #include <unordered_map> @@ -43,7 +44,7 @@ public: void ReportLoss() const; /// Reports a shader to Nsight Aftermath. - void SaveShader(const std::vector<u32>& spirv) const; + void SaveShader(std::span<const u32> spirv) const; /// Returns the name of the VkDriverId reported from Vulkan. std::string GetDriverName() const; @@ -128,6 +129,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -148,11 +154,31 @@ public: return guest_warp_stages & stage; } + /// Returns the maximum number of push descriptors. + u32 MaxPushDescriptors() const { + return max_push_descriptors; + } + /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { return is_formatless_image_load_supported; } + /// Returns true if shader int64 is supported. + bool IsShaderInt64Supported() const { + return is_shader_int64_supported; + } + + /// Returns true if shader int16 is supported. + bool IsShaderInt16Supported() const { + return is_shader_int16_supported; + } + + // Returns true if depth bounds is supported. + bool IsDepthBoundsSupported() const { + return is_depth_bounds_supported; + } + /// Returns true when blitting from and to depth stencil images is supported. bool IsBlitDepthStencilSupported() const { return is_blit_depth_stencil_supported; @@ -163,11 +189,36 @@ public: return nv_viewport_swizzle; } - /// Returns true if the device supports VK_EXT_scalar_block_layout. + /// Returns true if the device supports VK_NV_viewport_array2. + bool IsNvViewportArray2Supported() const { + return nv_viewport_array2; + } + + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; } + /// Returns true if the device supports VK_KHR_spirv_1_4. + bool IsKhrSpirv1_4Supported() const { + return khr_spirv_1_4; + } + + /// Returns true if the device supports VK_KHR_push_descriptor. + bool IsKhrPushDescriptorSupported() const { + return khr_push_descriptor; + } + + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. + bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { + return khr_workgroup_memory_explicit_layout; + } + /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { return ext_index_type_uint8; @@ -188,6 +239,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_subgroup_size_control. + bool IsExtSubgroupSizeControlSupported() const { + return ext_subgroup_size_control; + } + /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { return ext_transform_feedback; @@ -203,11 +259,36 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_line_rasterization. + bool IsExtLineRasterizationSupported() const { + return ext_line_rasterization; + } + + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. + bool IsExtVertexInputDynamicStateSupported() const { + return ext_vertex_input_dynamic_state; + } + /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_conservative_rasterization. + bool IsExtConservativeRasterizationSupported() const { + return ext_conservative_rasterization; + } + + /// Returns true if the device supports VK_EXT_provoking_vertex. + bool IsExtProvokingVertexSupported() const { + return ext_provoking_vertex; + } + + /// Returns true if the device supports VK_KHR_shader_atomic_int64. + bool IsExtShaderAtomicInt64Supported() const { + return ext_shader_atomic_int64; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -220,12 +301,7 @@ public: /// Returns the list of available extensions. const std::vector<std::string>& GetAvailableExtensions() const { - return reported_extensions; - } - - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; + return supported_extensions; } u64 GetDeviceLocalMemory() const { @@ -245,6 +321,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,46 +346,60 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_depth_bounds_supported{}; ///< Support for depth bounds. + bool is_shader_float64_supported{}; ///< Support for float64. + bool is_shader_int64_supported{}; ///< Support for int64. + bool is_shader_int16_supported{}; ///< Support for int16. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline + bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. + bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. + bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. + bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. + std::string vendor_name; ///< Device's driver name. + std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. std::unordered_map<VkFormat, VkFormatProperties> format_properties; diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..bbf0fccae 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); + X(vkCmdPushDescriptorSetWithTemplateKHR); X(vkCmdSetBlendConstants); X(vkCmdSetDepthBias); X(vkCmdSetDepthBounds); @@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthTestEnableEXT); X(vkCmdSetDepthWriteEnableEXT); X(vkCmdSetFrontFaceEXT); + X(vkCmdSetLineWidth); X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdSetVertexInputEXT); X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); @@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 3e36d356a..d76bb4324 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; PFN_vkBindBufferMemory vkBindBufferMemory{}; PFN_vkBindImageMemory vkBindImageMemory{}; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBeginQuery vkCmdBeginQuery{}; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; PFN_vkCmdBindPipeline vkCmdBindPipeline{}; PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; PFN_vkCmdBlitImage vkCmdBlitImage{}; PFN_vkCmdClearAttachments vkCmdClearAttachments{}; PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; @@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDispatch vkCmdDispatch{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdFillBuffer vkCmdFillBuffer{}; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; PFN_vkCmdPushConstants vkCmdPushConstants{}; + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; + PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; - PFN_vkCmdSetEvent vkCmdSetEvent{}; - PFN_vkCmdSetScissor vkCmdSetScissor{}; - PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; - PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; - PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; - PFN_vkCmdSetViewport vkCmdSetViewport{}; - PFN_vkCmdWaitEvents vkCmdWaitEvents{}; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; - PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; + PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; + PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; + PFN_vkCmdSetScissor vkCmdSetScissor{}; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; - PFN_vkCmdResolveImage vkCmdResolveImage{}; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; + PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; + PFN_vkCmdSetViewport vkCmdSetViewport{}; + PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; PFN_vkCreateCommandPool vkCreateCommandPool{}; @@ -989,6 +992,12 @@ public: dynamic_offsets.size(), dynamic_offsets.data()); } + void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, + VkPipelineLayout layout, u32 set, + const void* data) const noexcept { + dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); + } + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { dld->vkCmdBindPipeline(handle, bind_point, pipeline); } @@ -1190,6 +1199,10 @@ public: dld->vkCmdSetFrontFaceEXT(handle, front_face); } + void SetLineWidth(float line_width) const noexcept { + dld->vkCmdSetLineWidth(handle, line_width); + } + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); } @@ -1203,6 +1216,13 @@ public: dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } + void SetVertexInputEXT( + vk::Span<VkVertexInputBindingDescription2EXT> bindings, + vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept { + dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), + attributes.data()); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index d72ca5acc..25b658b2a 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -64,12 +64,13 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); - system.Renderer().ReadRasterizer()->LoadDiskResources( - system.CurrentProcess()->GetTitleID(), stop_token, - [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { - emit LoadProgress(stage, value, total); - }); - + if (Settings::values.use_disk_shader_cache.GetValue()) { + system.Renderer().ReadRasterizer()->LoadDiskResources( + system.CurrentProcess()->GetTitleID(), stop_token, + [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { + emit LoadProgress(stage, value, total); + }); + } emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); gpu.ReleaseContext(); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a5e032959..52b3ed02e 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -814,7 +814,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); - ReadGlobalSetting(Settings::values.use_assembly_shaders); + ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_caches_gc); @@ -824,6 +824,8 @@ void Config::ReadRendererValues() { if (global) { ReadBasicSetting(Settings::values.renderer_debug); + ReadBasicSetting(Settings::values.enable_nsight_aftermath); + ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); @@ -1343,7 +1345,10 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_nvdec_emulation); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); - WriteGlobalSetting(Settings::values.use_assembly_shaders); + WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), + static_cast<u32>(Settings::values.shader_backend.GetValue(global)), + static_cast<u32>(Settings::values.shader_backend.GetDefault()), + Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_caches_gc); @@ -1353,6 +1358,8 @@ void Config::SaveRendererValues() { if (global) { WriteBasicSetting(Settings::values.renderer_debug); + WriteBasicSetting(Settings::values.enable_nsight_aftermath); + WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 96f9b6de1..4bbb9f1cd 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -180,5 +180,6 @@ private: // These metatype declarations cannot be in common/settings.h because core is devoid of QT Q_DECLARE_METATYPE(Settings::CPUAccuracy); -Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::GPUAccuracy); +Q_DECLARE_METATYPE(Settings::RendererBackend); +Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 8fceb3878..f7e29dbd7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() { ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_cpu_debugging->setEnabled(runtime_lock); ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); + ui->enable_nsight_aftermath->setEnabled(runtime_lock); + ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); + ui->disable_loop_safety_checks->setEnabled(runtime_lock); + ui->disable_loop_safety_checks->setChecked( + Settings::values.disable_shader_loop_safety_checks.GetValue()); ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); } @@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); + Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); + Settings::values.disable_shader_loop_safety_checks = + ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Debugger::ToggleConsole(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 1260ad6f0..c8baf2921 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -126,6 +126,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="enable_nsight_aftermath"> + <property name="toolTip"> + <string>When checked, it enables Nsight Aftermath crash dumps</string> + </property> + <property name="text"> + <string>Enable Nsight Aftermath</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="disable_macro_jit"> <property name="enabled"> <bool>true</bool> @@ -138,6 +148,16 @@ </property> </widget> </item> + <item> + <widget class="QCheckBox" name="disable_loop_safety_checks"> + <property name="toolTip"> + <string>When checked, it executes shaders without loop logic changes</string> + </property> + <property name="text"> + <string>Disable Loop safety checks</string> + </property> + </widget> + </item> </layout> </widget> </item> @@ -252,11 +272,17 @@ <tabstops> <tabstop>log_filter_edit</tabstop> <tabstop>toggle_console</tabstop> + <tabstop>extended_logging</tabstop> <tabstop>open_log_button</tabstop> <tabstop>homebrew_args_edit</tabstop> <tabstop>enable_graphics_debugging</tabstop> + <tabstop>enable_nsight_aftermath</tabstop> + <tabstop>disable_macro_jit</tabstop> + <tabstop>disable_loop_safety_checks</tabstop> <tabstop>reporting_services</tabstop> <tabstop>quest_flag</tabstop> + <tabstop>use_debug_asserts</tabstop> + <tabstop>use_auto_stub</tabstop> </tabstops> <resources/> <connections/> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 4d5b4c0e6..fef211707 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -26,19 +26,29 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->setupUi(this); + for (const auto& device : vulkan_devices) { + ui->device->addItem(device); + } + + ui->backend->addItem(QStringLiteral("GLSL")); + ui->backend->addItem(tr("GLASM (NVIDIA Only)")); + ui->backend->addItem(QStringLiteral("SPIR-V (Experimental, Mesa Only)")); + SetupPerGameUI(); SetConfiguration(); connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { - UpdateDeviceComboBox(); + UpdateAPILayout(); if (!Settings::IsConfiguringGlobal()) { ConfigurationShared::SetHighlight( - ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); + ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); } }); connect(ui->device, qOverload<int>(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->backend, qOverload<int>(&QComboBox::activated), this, + [this](int backend) { UpdateShaderBackendSelection(backend); }); connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); @@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) { } } +void ConfigureGraphics::UpdateShaderBackendSelection(int backend) { + if (backend == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) { + shader_backend = static_cast<Settings::ShaderBackend>(backend); + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); - ui->api->setEnabled(runtime_lock); + ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); @@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() { ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); - ConfigurationShared::SetHighlight(ui->api_layout, + ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, @@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() { ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); } - UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue())); - UpdateDeviceComboBox(); + UpdateAPILayout(); } void ConfigureGraphics::ApplyConfiguration() { @@ -128,6 +146,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.shader_backend.UsingGlobal()) { + Settings::values.shader_backend.SetValue(shader_backend); + } if (Settings::values.vulkan_device.UsingGlobal()) { Settings::values.vulkan_device.SetValue(vulkan_device); } @@ -139,15 +160,22 @@ void ConfigureGraphics::ApplyConfiguration() { } else { if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(true); } else { Settings::values.renderer_backend.SetGlobal(false); Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); - if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + Settings::values.shader_backend.SetGlobal(false); + Settings::values.vulkan_device.SetGlobal(true); + Settings::values.shader_backend.SetValue(shader_backend); + break; + case Settings::RendererBackend::Vulkan: + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(false); Settings::values.vulkan_device.SetValue(vulkan_device); - } else { - Settings::values.vulkan_device.SetGlobal(true); + break; } } @@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { ui->bg_button->setIcon(color_icon); } -void ConfigureGraphics::UpdateDeviceComboBox() { - ui->device->clear(); - - bool enabled = false; - +void ConfigureGraphics::UpdateAPILayout() { if (!Settings::IsConfiguringGlobal() && ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + vulkan_device = Settings::values.vulkan_device.GetValue(true); + shader_backend = Settings::values.shader_backend.GetValue(true); + ui->device_widget->setEnabled(false); + ui->backend_widget->setEnabled(false); + } else { vulkan_device = Settings::values.vulkan_device.GetValue(); + shader_backend = Settings::values.shader_backend.GetValue(); + ui->device_widget->setEnabled(true); + ui->backend_widget->setEnabled(true); } + switch (GetCurrentGraphicsBackend()) { case Settings::RendererBackend::OpenGL: - ui->device->addItem(tr("OpenGL Graphics Device")); - enabled = false; + ui->backend->setCurrentIndex(static_cast<u32>(shader_backend)); + ui->device_widget->setVisible(false); + ui->backend_widget->setVisible(true); break; case Settings::RendererBackend::Vulkan: - for (const auto& device : vulkan_devices) { - ui->device->addItem(device); - } ui->device->setCurrentIndex(vulkan_device); - enabled = !vulkan_devices.empty(); + ui->device_widget->setVisible(true); + ui->backend_widget->setVisible(false); break; } - // If in per-game config and use global is selected, don't enable. - enabled &= !(!Settings::IsConfiguringGlobal() && - ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); - ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } void ConfigureGraphics::RetrieveVulkanDevices() try { diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 6418115cf..c866b911b 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -34,8 +34,9 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); - void UpdateDeviceComboBox(); + void UpdateAPILayout(); void UpdateDeviceSelection(int device); + void UpdateShaderBackendSelection(int backend); void RetrieveVulkanDevices(); @@ -53,4 +54,5 @@ private: std::vector<QString> vulkan_devices; u32 vulkan_device{}; + Settings::ShaderBackend shader_backend{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 5b999d84d..099ddbb7c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -23,7 +23,7 @@ </property> <layout class="QVBoxLayout" name="verticalLayout_3"> <item> - <widget class="QWidget" name="api_layout" native="true"> + <widget class="QWidget" name="api_widget" native="true"> <layout class="QGridLayout" name="gridLayout"> <property name="leftMargin"> <number>0</number> @@ -40,37 +40,107 @@ <property name="horizontalSpacing"> <number>6</number> </property> - <item row="0" column="0"> - <widget class="QLabel" name="api_label"> - <property name="text"> - <string>API:</string> - </property> + <item row="4" column="0"> + <widget class="QWidget" name="backend_widget" native="true"> + <layout class="QHBoxLayout" name="backend_layout"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="backend_label"> + <property name="text"> + <string>Shader Backend:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="backend"/> + </item> + </layout> </widget> </item> - <item row="0" column="1"> - <widget class="QComboBox" name="api"> - <item> - <property name="text"> - <string notr="true">OpenGL</string> + <item row="2" column="0"> + <widget class="QWidget" name="device_widget" native="true"> + <layout class="QHBoxLayout" name="device_layout"> + <property name="leftMargin"> + <number>0</number> </property> - </item> - <item> - <property name="text"> - <string notr="true">Vulkan</string> + <property name="topMargin"> + <number>0</number> </property> - </item> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="device_label"> + <property name="text"> + <string>Device:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="device"/> + </item> + </layout> </widget> </item> - <item row="1" column="0"> - <widget class="QLabel" name="device_label"> - <property name="text"> - <string>Device:</string> - </property> + <item row="0" column="0"> + <widget class="QWidget" name="api_layout_2" native="true"> + <layout class="QHBoxLayout" name="api_layout"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="api_label"> + <property name="text"> + <string>API:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="api"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Fixed"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <item> + <property name="text"> + <string notr="true">OpenGL</string> + </property> + </item> + <item> + <property name="text"> + <string notr="true">Vulkan</string> + </property> + </item> + </widget> + </item> + </layout> </widget> </item> - <item row="1" column="1"> - <widget class="QComboBox" name="device"/> - </item> </layout> </widget> </item> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a9e611125..38276feb1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); - ui->use_assembly_shaders->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); - ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -58,8 +56,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, - ui->use_assembly_shaders, use_assembly_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); @@ -100,7 +96,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); - ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); @@ -112,8 +107,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { } ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); - ConfigurationShared::SetColoredTristate( - ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, use_asynchronous_shaders); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 9148aacf2..7356e6916 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -35,7 +35,6 @@ private: std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; ConfigurationShared::CheckState use_vsync; - ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_caches_gc; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ad0840355..379dc5d2e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -77,22 +77,12 @@ </widget> </item> <item> - <widget class="QCheckBox" name="use_assembly_shaders"> - <property name="toolTip"> - <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string> - </property> - <property name="text"> - <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string> - </property> - </widget> - </item> - <item> <widget class="QCheckBox" name="use_asynchronous_shaders"> <property name="toolTip"> <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> </property> <property name="text"> - <string>Use asynchronous shader building (experimental)</string> + <string>Use asynchronous shader building</string> </property> </widget> </item> @@ -144,22 +134,22 @@ </item> <item> <property name="text"> - <string>2x</string> + <string>2x (WILL BREAK THINGS)</string> </property> </item> <item> <property name="text"> - <string>4x</string> + <string>4x (WILL BREAK THINGS)</string> </property> </item> <item> <property name="text"> - <string>8x</string> + <string>8x (WILL BREAK THINGS)</string> </property> </item> <item> <property name="text"> - <string>16x</string> + <string>16x (WILL BREAK THINGS)</string> </property> </item> </widget> diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 76c063c97..f746bd85d 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -520,9 +520,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QMenu* remove_menu = context_menu.addMenu(tr("Remove")); QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); - QAction* remove_shader_cache = remove_menu->addAction(tr("Remove Shader Cache")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); + QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); + QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); remove_menu->addSeparator(); + QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); @@ -540,6 +542,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri open_transferable_shader_cache->setVisible(program_id != 0); remove_update->setVisible(program_id != 0); remove_dlc->setVisible(program_id != 0); + remove_gl_shader_cache->setVisible(program_id != 0); + remove_vk_shader_cache->setVisible(program_id != 0); remove_shader_cache->setVisible(program_id != 0); remove_all_content->setVisible(program_id != 0); auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); @@ -569,8 +573,14 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_dlc, &QAction::triggered, [this, program_id]() { emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent); }); + connect(remove_gl_shader_cache, &QAction::triggered, [this, program_id, path]() { + emit RemoveFileRequested(program_id, GameListRemoveTarget::GlShaderCache, path); + }); + connect(remove_vk_shader_cache, &QAction::triggered, [this, program_id, path]() { + emit RemoveFileRequested(program_id, GameListRemoveTarget::VkShaderCache, path); + }); connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() { - emit RemoveFileRequested(program_id, GameListRemoveTarget::ShaderCache, path); + emit RemoveFileRequested(program_id, GameListRemoveTarget::AllShaderCache, path); }); connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index c9a9f4654..10339dcca 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -41,7 +41,9 @@ enum class GameListOpenTarget { }; enum class GameListRemoveTarget { - ShaderCache, + GlShaderCache, + VkShaderCache, + AllShaderCache, CustomConfiguration, }; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 03a909d17..a5159a1ee 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1654,35 +1654,15 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) { const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); - const auto transferable_shader_cache_folder_path = shader_cache_dir / "opengl" / "transferable"; - const auto transferable_shader_cache_file_path = - transferable_shader_cache_folder_path / fmt::format("{:016X}.bin", program_id); - - if (!Common::FS::Exists(transferable_shader_cache_file_path)) { + const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)}; + if (!Common::FS::CreateDirs(shader_cache_folder_path)) { QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"), - tr("A shader cache for this title does not exist.")); + tr("Filed to create the shader cache directory for this title.")); return; } - - const auto qt_shader_cache_folder_path = - QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_folder_path)); - const auto qt_shader_cache_file_path = - QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_file_path)); - - // Windows supports opening a folder with selecting a specified file in explorer. On every other - // OS we just open the transferable shader cache folder without preselecting the transferable - // shader cache file for the selected game. -#if defined(Q_OS_WIN) - const QString explorer = QStringLiteral("explorer"); - QStringList param; - if (!QFileInfo(qt_shader_cache_file_path).isDir()) { - param << QStringLiteral("/select,"); - } - param << QDir::toNativeSeparators(qt_shader_cache_file_path); - QProcess::startDetached(explorer, param); -#else - QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_folder_path)); -#endif + const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)}; + const auto qt_shader_cache_path = QString::fromStdString(shader_path_string); + QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_path)); } static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) { @@ -1825,8 +1805,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ const std::string& game_path) { const QString question = [this, target] { switch (target) { - case GameListRemoveTarget::ShaderCache: - return tr("Delete Transferable Shader Cache?"); + case GameListRemoveTarget::GlShaderCache: + return tr("Delete OpenGL Transferable Shader Cache?"); + case GameListRemoveTarget::VkShaderCache: + return tr("Delete Vulkan Transferable Shader Cache?"); + case GameListRemoveTarget::AllShaderCache: + return tr("Delete All Transferable Shader Caches?"); case GameListRemoveTarget::CustomConfiguration: return tr("Remove Custom Game Configuration?"); default: @@ -1840,8 +1824,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } switch (target) { - case GameListRemoveTarget::ShaderCache: - RemoveTransferableShaderCache(program_id); + case GameListRemoveTarget::GlShaderCache: + case GameListRemoveTarget::VkShaderCache: + RemoveTransferableShaderCache(program_id, target); + break; + case GameListRemoveTarget::AllShaderCache: + RemoveAllTransferableShaderCaches(program_id); break; case GameListRemoveTarget::CustomConfiguration: RemoveCustomConfiguration(program_id, game_path); @@ -1849,18 +1837,27 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } } -void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { +void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) { + const auto target_file_name = [target] { + switch (target) { + case GameListRemoveTarget::GlShaderCache: + return "opengl.bin"; + case GameListRemoveTarget::VkShaderCache: + return "vulkan.bin"; + default: + return ""; + } + }(); const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); - const auto transferable_shader_cache_file_path = - shader_cache_dir / "opengl" / "transferable" / fmt::format("{:016X}.bin", program_id); + const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id); + const auto target_file = shader_cache_folder_path / target_file_name; - if (!Common::FS::Exists(transferable_shader_cache_file_path)) { + if (!Common::FS::Exists(target_file)) { QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"), tr("A shader cache for this title does not exist.")); return; } - - if (Common::FS::RemoveFile(transferable_shader_cache_file_path)) { + if (Common::FS::RemoveFile(target_file)) { QMessageBox::information(this, tr("Successfully Removed"), tr("Successfully removed the transferable shader cache.")); } else { @@ -1869,6 +1866,24 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { } } +void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { + const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); + const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); + + if (!Common::FS::Exists(program_shader_cache_dir)) { + QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"), + tr("A shader cache for this title does not exist.")); + return; + } + if (Common::FS::RemoveDirRecursively(program_shader_cache_dir)) { + QMessageBox::information(this, tr("Successfully Removed"), + tr("Successfully removed the transferable shader caches.")); + } else { + QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"), + tr("Failed to remove the transferable shader cache directory.")); + } +} + void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) { const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path)); const auto config_file_name = @@ -2900,13 +2915,13 @@ void GMainWindow::UpdateStatusBar() { return; } - auto results = Core::System::GetInstance().GetAndResetPerfStats(); - auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); - const auto shaders_building = shader_notify.GetShadersBuilding(); + auto& system = Core::System::GetInstance(); + auto results = system.GetAndResetPerfStats(); + auto& shader_notify = system.GPU().ShaderNotify(); + const int shaders_building = shader_notify.ShadersBuilding(); - if (shaders_building != 0) { - shader_building_label->setText( - tr("Building: %n shader(s)", "", static_cast<int>(shaders_building))); + if (shaders_building > 0) { + shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building)); shader_building_label->setVisible(true); } else { shader_building_label->setVisible(false); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index a50e5b9fe..3eb6aed56 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -282,7 +282,8 @@ private: void RemoveBaseContent(u64 program_id, const QString& entry_type); void RemoveUpdateContent(u64 program_id, const QString& entry_type); void RemoveAddOnContent(u64 program_id, const QString& entry_type); - void RemoveTransferableShaderCache(u64 program_id); + void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); + void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); InstallResult InstallNSPXCI(const QString& filename); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3e22fee37..640d7d111 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -444,6 +444,8 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); ReadSetting("Renderer", Settings::values.renderer_debug); + ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); + ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); ReadSetting("Renderer", Settings::values.fullscreen_mode); @@ -456,7 +458,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); ReadSetting("Renderer", Settings::values.use_vsync); ReadSetting("Renderer", Settings::values.disable_fps_limit); - ReadSetting("Renderer", Settings::values.use_assembly_shaders); + ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 88d33ecab..b7115b06a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -221,6 +221,14 @@ backend = # 0 (default): Disabled, 1: Enabled debug = +# Enable Nsight Aftermath crash dumps +# 0 (default): Disabled, 1: Enabled +nsight_aftermath = + +# Disable shader loop safety checks, executing the shader without loop logic changes +# 0 (default): Disabled, 1: Enabled +disable_shader_loop_safety_checks = + # Which Vulkan physical device to use (defaults to 0) vulkan_device = @@ -240,9 +248,10 @@ max_anisotropy = # 0 (default): Off, 1: On use_vsync = -# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. -# 0: Off, 1 (default): On -use_assembly_shaders = +# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is +# not available and GLASM is selected, GLSL will be used. +# 0: GLSL, 1 (default): GLASM, 2: SPIR-V +shader_backend = # Whether to allow asynchronous shader building. # 0 (default): Off, 1: On diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index ac4ea88d3..35ce23696 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -218,9 +218,11 @@ int main(int argc, char** argv) { // Core is loaded, start the GPU (makes the GPU contexts current to this thread) system.GPU().Start(); - system.Renderer().ReadRasterizer()->LoadDiskResources( - system.CurrentProcess()->GetTitleID(), std::stop_token{}, - [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); + if (Settings::values.use_disk_shader_cache.GetValue()) { + system.Renderer().ReadRasterizer()->LoadDiskResources( + system.CurrentProcess()->GetTitleID(), std::stop_token{}, + [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); + } void(system.Run()); while (emu_window->IsOpen()) { |