diff options
20 files changed, 260 insertions, 93 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6047f3ebe..fbd4ec6dc 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h + frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC shader_info.h ) -target_include_directories(shader_recompiler PRIVATE sirit) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) -target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) +target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) add_executable(shader_util main.cpp) target_link_libraries(shader_util PRIVATE shader_recompiler) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4ce07c281..2519e446a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,8 +14,6 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { template <class Func> @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { throw NotImplementedException("Phi node type {}", type); } } + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + if (!profile.support_float_controls) { + return; + } + const Info& info{program.info}; + if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && + !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { + return; + } + ctx.AddExtension("SPV_KHR_float_controls"); + + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + } + } +} } // Anonymous namespace -std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { +std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -131,10 +181,11 @@ std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { ctx.OpFunctionEnd(); } boost::container::small_vector<Id, 32> interfaces; - if (program.info.uses_workgroup_id) { + const Info& info{program.info}; + if (info.uses_workgroup_id) { interfaces.push_back(ctx.workgroup_id); } - if (program.info.uses_local_invocation_id) { + if (info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } const std::span interfaces_span(interfaces.data(), interfaces.size()); @@ -144,6 +195,8 @@ std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); + SetupDenormControl(profile, program, ctx, func); + return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 2b59c0b72..de624a151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -11,10 +11,12 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program); +[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, + IR::Program& program); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9ef180531..c9687de37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::NoContraction); } switch (flags.rounding) { + case IR::FpRounding::DontCare: + break; case IR::FpRounding::RN: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); break; case IR::FpRounding::RM: ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); break; } - if (flags.fmz_mode != IR::FmzMode::FTZ) { - throw NotImplementedException("Denorm management not implemented"); - } return op; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } -F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst<F16>(Opcode::FPRoundEven16, value); + return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value); case Type::F32: - return Inst<F32>(Opcode::FPRoundEven32, value); + return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value); case Type::F64: - return Inst<F64>(Opcode::FPRoundEven64, value); + return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst<F16>(Opcode::FPFloor16, value); + return Inst<F16>(Opcode::FPFloor16, Flags{control}, value); case Type::F32: - return Inst<F32>(Opcode::FPFloor32, value); + return Inst<F32>(Opcode::FPFloor32, Flags{control}, value); case Type::F64: - return Inst<F64>(Opcode::FPFloor64, value); + return Inst<F64>(Opcode::FPFloor64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst<F16>(Opcode::FPCeil16, value); + return Inst<F16>(Opcode::FPCeil16, Flags{control}, value); case Type::F32: - return Inst<F32>(Opcode::FPCeil32, value); + return Inst<F32>(Opcode::FPCeil32, Flags{control}, value); case Type::F64: - return Inst<F64>(Opcode::FPCeil64, value); + return Inst<F64>(Opcode::FPCeil64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst<F16>(Opcode::FPTrunc16, value); + return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value); case Type::F32: - return Inst<F32>(Opcode::FPTrunc32, value); + return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value); case Type::F64: - return Inst<F64>(Opcode::FPTrunc64, value); + return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -129,10 +129,10 @@ public: [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,25 +4,30 @@ #pragma once +#include "common/common_types.h" + namespace Shader::IR { enum class FmzMode : u8 { - None, // Denorms are not flushed, NAN is propagated (nouveau) - FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) - FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) }; enum class FpRounding : u8 { - RN, // Round to nearest even, - RM, // Round towards negative infinity - RP, // Round towards positive infinity - RZ, // Round towards zero + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero }; struct FpControl { bool no_contraction{false}; - FpRounding rounding{FpRounding::RN}; - FmzMode fmz_mode{FmzMode::FTZ}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; }; static_assert(sizeof(FpControl) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,23 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { namespace { -void AddConstantBufferDescriptor(Info& info, u32 index) { - auto& descriptor{info.constant_buffers.at(index)}; - if (descriptor) { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { return; } - descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + info.constant_buffer_mask |= 1U << index; + info.constant_buffer_descriptors.push_back({ .index{index}, .count{1}, }); } -void Visit(Info& info, IR::Inst& inst) { +void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { break; case IR::Opcode::GetCbuf: if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32()); + AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { break; } } + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags<IR::FpControl>()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: { + const auto control{inst.Flags<IR::FpControl>()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); +} } // Anonymous namespace void CollectShaderInfoPass(IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_offset{storage_buffer.offset}, .count{1}, }); - info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); ++storage_index; } for (const StorageInst& storage_inst : to_replace) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index abd44e323..72565f477 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -60,6 +60,17 @@ void RunDatabase() { fmt::print(stdout, "{} ms", duration_cast<milliseconds>(t - t0).count() / double(N)); } +static constexpr Profile PROFILE{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = true, + .support_separate_rounding_mode = true, + .support_fp16_denorm_preserve = true, + .support_fp32_denorm_preserve = true, + .support_fp16_denorm_flush = true, + .support_fp32_denorm_flush = true, +}; + int main() { // RunDatabase(); @@ -76,7 +87,7 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); std::fclose(file); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c96d783b7..9881bebab 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -7,7 +7,14 @@ namespace Shader { struct Profile { - bool unified_descriptor_binding; + bool unified_descriptor_binding{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b25081e39..527e19c27 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -14,14 +14,15 @@ namespace Shader { -std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address) { +std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env, + u32 start_address) { ObjectPool<Maxwell::Flow::Block> flow_block_pool; ObjectPool<IR::Inst> inst_pool; ObjectPool<IR::Block> block_pool; Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 4cb973878..2529463ae 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -9,10 +9,12 @@ #include "common/common_types.h" #include "shader_recompiler/environment.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" namespace Shader { -[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address); +[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, + Environment& env, u32 start_address); } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f49a79368..8766bf13e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -31,14 +31,15 @@ struct Info { bool uses_local_invocation_id{}; bool uses_fp16{}; bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; u32 constant_buffer_mask{}; - std::array<ConstantBufferDescriptor*, MAX_CBUFS> constant_buffers{}; boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> constant_buffer_descriptors; - - std::array<StorageBufferDescriptor*, MAX_SSBOS> storage_buffers{}; boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -684,7 +684,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -128,6 +128,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -223,11 +228,6 @@ public: return reported_extensions; } - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - u64 GetDeviceLocalMemory() const { return device_access_memory; } @@ -245,6 +245,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,14 +270,15 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. @@ -301,9 +305,6 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: |